{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 9.999255398361877, "eval_steps": 500, "global_step": 6710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2.75, "completions/mean_length": 2.03125, "completions/min_length": 2.0, "epoch": 0.0014892032762472078, "grad_norm": 0.0, "kl": 0.0, "learning_rate": 1.4705882352941176e-08, "loss": 0.0, "memory(GiB)": 71.72, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 1, "train_speed(iter/s)": 0.006208 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4.5, "completions/mean_length": 2.1041667461395264, "completions/min_length": 2.0, "epoch": 0.0029784065524944155, "grad_norm": 8.93798870572509, "kl": 0.0, "learning_rate": 2.941176470588235e-08, "loss": -0.03398590907454491, "memory(GiB)": 71.72, "reward": 0.010416666977107525, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.010416666977107525, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 2, "train_speed(iter/s)": 0.010175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2.0, "completions/mean_length": 2.0, "completions/min_length": 2.0, "epoch": 0.004467609828741623, "grad_norm": 0.0009366537576647069, "kl": 0.0001220703125, "learning_rate": 4.411764705882353e-08, "loss": 1.2200325727462769e-07, "memory(GiB)": 71.72, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 3, "train_speed(iter/s)": 0.014127 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2.0, "completions/mean_length": 2.0, "completions/min_length": 2.0, "epoch": 0.005956813104988831, "grad_norm": 0.00019065037548839618, "kl": 0.0, "learning_rate": 5.88235294117647e-08, "loss": 0.0, "memory(GiB)": 71.72, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 4, "train_speed(iter/s)": 0.017498 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2.0, "completions/mean_length": 2.0, "completions/min_length": 2.0, "epoch": 0.007446016381236039, "grad_norm": 0.0009069303413571532, "kl": 0.00020360946655273438, "learning_rate": 7.352941176470588e-08, "loss": 2.0333877159828262e-07, "memory(GiB)": 82.9, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 5, "train_speed(iter/s)": 0.018149 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.25, "completions/mean_length": 2.125, "completions/min_length": 2.0, "epoch": 0.008935219657483246, "grad_norm": 8.905374371557405, "kl": 0.0005664825439453125, "learning_rate": 8.823529411764706e-08, "loss": -0.03398533910512924, "memory(GiB)": 82.9, "reward": 0.010416666977107525, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.010416666977107525, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 6, "train_speed(iter/s)": 0.019722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.5, "completions/mean_length": 2.1041667461395264, "completions/min_length": 2.0, "epoch": 0.010424422933730455, "grad_norm": 0.0018862691961928182, "kl": 0.00027751922607421875, "learning_rate": 1.0294117647058822e-07, "loss": 2.76789080544404e-07, "memory(GiB)": 82.9, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 7, "train_speed(iter/s)": 0.02184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4.5, "completions/mean_length": 2.208333373069763, "completions/min_length": 2.0, "epoch": 0.011913626209977662, "grad_norm": 17.78765615675525, "kl": 8.630752563476562e-05, "learning_rate": 1.176470588235294e-07, "loss": -0.047109540551900864, "memory(GiB)": 82.9, "reward": 0.041666666977107525, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.041666666977107525, "rewards/CineAccuracyORM/std": 0.13548902794718742, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 8, "train_speed(iter/s)": 0.023178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 40.5, "completions/mean_length": 3.6562501192092896, "completions/min_length": 2.0, "epoch": 0.01340282948622487, "grad_norm": 10.58978919512171, "kl": 0.00013256072998046875, "learning_rate": 1.3235294117647057e-07, "loss": -0.03398577868938446, "memory(GiB)": 82.9, "reward": 0.010416666977107525, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.010416666977107525, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 9, "train_speed(iter/s)": 0.02374 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.25, "completions/mean_length": 2.145833373069763, "completions/min_length": 2.0, "epoch": 0.014892032762472078, "grad_norm": 7.613458549825879, "kl": 0.00012230873107910156, "learning_rate": 1.4705882352941175e-07, "loss": -0.03398578613996506, "memory(GiB)": 82.9, "reward": 0.010416666977107525, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.010416666977107525, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 10, "train_speed(iter/s)": 0.024191 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4.25, "completions/mean_length": 2.3750000596046448, "completions/min_length": 2.0, "epoch": 0.016381236038719285, "grad_norm": 13.738991641983741, "kl": 0.0003712177276611328, "learning_rate": 1.6176470588235293e-07, "loss": -0.03812079504132271, "memory(GiB)": 82.9, "reward": 0.0520833358168602, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.0520833358168602, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 11, "train_speed(iter/s)": 0.025595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4.5, "completions/mean_length": 2.302083373069763, "completions/min_length": 2.0, "epoch": 0.017870439314966492, "grad_norm": 13.533553153771084, "kl": 0.0005645751953125, "learning_rate": 1.764705882352941e-07, "loss": -0.03931673243641853, "memory(GiB)": 82.9, "reward": 0.03125, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.03125, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 12, "train_speed(iter/s)": 0.026301 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.25, "completions/mean_length": 2.052083373069763, "completions/min_length": 2.0, "epoch": 0.0193596425912137, "grad_norm": 8.374398834817343, "kl": 0.00020313262939453125, "learning_rate": 1.9117647058823527e-07, "loss": -0.03398570418357849, "memory(GiB)": 82.9, "reward": 0.010416666977107525, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.010416666977107525, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 13, "train_speed(iter/s)": 0.026945 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2.5, "completions/mean_length": 2.0625, "completions/min_length": 2.0, "epoch": 0.02084884586746091, "grad_norm": 0.001051810303149859, "kl": 0.0, "learning_rate": 2.0588235294117645e-07, "loss": 0.0, "memory(GiB)": 82.9, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 14, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 66.25, "completions/mean_length": 6.333333373069763, "completions/min_length": 2.0, "epoch": 0.022338049143708117, "grad_norm": 0.8382214310804188, "kl": 0.00035262107849121094, "learning_rate": 2.2058823529411763e-07, "loss": 0.038756776601076126, "memory(GiB)": 82.9, "reward": 0.02083333395421505, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.02083333395421505, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 15, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 2.25, "completions/mean_length": 2.0104166865348816, "completions/min_length": 2.0, "epoch": 0.023827252419955324, "grad_norm": 0.0015859054264900933, "kl": 0.0001919269561767578, "learning_rate": 2.352941176470588e-07, "loss": 1.9204216528123652e-07, "memory(GiB)": 82.9, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 16, "train_speed(iter/s)": 0.027625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.25, "completions/mean_length": 2.1041666865348816, "completions/min_length": 2.0, "epoch": 0.02531645569620253, "grad_norm": 11.005828081576613, "kl": 0.00028204917907714844, "learning_rate": 2.5e-07, "loss": -0.05191751942038536, "memory(GiB)": 82.9, "reward": 0.02083333395421505, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.02083333395421505, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 17, "train_speed(iter/s)": 0.028115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.25, "completions/mean_length": 2.4166666865348816, "completions/min_length": 2.0, "epoch": 0.02680565897244974, "grad_norm": 9.348605249394643, "kl": 0.0012454986572265625, "learning_rate": 2.6470588235294114e-07, "loss": -0.03398466110229492, "memory(GiB)": 82.9, "reward": 0.02083333395421505, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.02083333395421505, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 18, "train_speed(iter/s)": 0.028604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.0, "completions/mean_length": 2.0625000596046448, "completions/min_length": 2.0, "epoch": 0.028294862248696945, "grad_norm": 0.0027618064688784355, "kl": 0.0006504058837890625, "learning_rate": 2.7941176470588235e-07, "loss": 6.509323497994046e-07, "memory(GiB)": 82.9, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 19, "train_speed(iter/s)": 0.029049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.0, "completions/mean_length": 2.0625000596046448, "completions/min_length": 2.0, "epoch": 0.029784065524944156, "grad_norm": 0.0037856058350700357, "kl": 0.0009508132934570312, "learning_rate": 2.941176470588235e-07, "loss": 9.494419828115497e-07, "memory(GiB)": 82.9, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 20, "train_speed(iter/s)": 0.029574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.25, "completions/mean_length": 2.15625, "completions/min_length": 2.0, "epoch": 0.03127326880119136, "grad_norm": 10.504277211644746, "kl": 0.0031332969665527344, "learning_rate": 3.088235294117647e-07, "loss": -0.013120587915182114, "memory(GiB)": 82.9, "reward": 0.03125, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.03125, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 21, "train_speed(iter/s)": 0.029574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.75, "completions/mean_length": 2.239583373069763, "completions/min_length": 2.0, "epoch": 0.03276247207743857, "grad_norm": 6.019153072057787, "kl": 0.0033708810806274414, "learning_rate": 3.2352941176470586e-07, "loss": -0.01312033087015152, "memory(GiB)": 82.9, "reward": 0.03125, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.03125, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 22, "train_speed(iter/s)": 0.030305 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.25, "completions/mean_length": 2.2291666865348816, "completions/min_length": 2.0, "epoch": 0.03425167535368578, "grad_norm": 6.175364951567711, "kl": 0.004140377044677734, "learning_rate": 3.3823529411764707e-07, "loss": -0.024384688585996628, "memory(GiB)": 82.9, "reward": 0.02083333395421505, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.02083333395421505, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 23, "train_speed(iter/s)": 0.030619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 4.5, "completions/mean_length": 2.4479166865348816, "completions/min_length": 2.0, "epoch": 0.035740878629932984, "grad_norm": 14.505310283867756, "kl": 0.014821529388427734, "learning_rate": 3.529411764705882e-07, "loss": -0.053265661001205444, "memory(GiB)": 82.9, "reward": 0.0520833358168602, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.0520833358168602, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 24, "train_speed(iter/s)": 0.030929 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.25, "completions/mean_length": 2.09375, "completions/min_length": 2.0, "epoch": 0.03723008190618019, "grad_norm": 0.008697945800291595, "kl": 0.006083250045776367, "learning_rate": 3.6764705882352943e-07, "loss": 6.099527581682196e-06, "memory(GiB)": 82.9, "reward": 0.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 25, "train_speed(iter/s)": 0.030814 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 22.5, "completions/mean_length": 3.4166667461395264, "completions/min_length": 2.0, "epoch": 0.0387192851824274, "grad_norm": 32.664052998755785, "kl": 0.0335540771484375, "learning_rate": 3.8235294117647053e-07, "loss": -0.16422362625598907, "memory(GiB)": 82.9, "reward": 0.12500000279396772, "reward_std": 0.15546050108969212, "rewards/CineAccuracyORM/mean": 0.11458333861082792, "rewards/CineAccuracyORM/std": 0.25156543776392937, "rewards/Format/mean": 0.010416666977107525, "rewards/Format/std": 0.05103103443980217, "step": 26, "train_speed(iter/s)": 0.031014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 3.25, "completions/mean_length": 2.270833373069763, "completions/min_length": 2.0, "epoch": 0.040208488458674606, "grad_norm": 11.738055463592154, "kl": 0.013711929321289062, "learning_rate": 3.9705882352941174e-07, "loss": -0.05197037383913994, "memory(GiB)": 87.24, "reward": 0.03125, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.03125, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 27, "train_speed(iter/s)": 0.03111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 5.75, "completions/mean_length": 2.5416667461395264, "completions/min_length": 2.0, "epoch": 0.04169769173492182, "grad_norm": 22.30338216981504, "kl": 0.0592041015625, "learning_rate": 4.117647058823529e-07, "loss": -0.07431589066982269, "memory(GiB)": 87.24, "reward": 0.031250000931322575, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.031250000931322575, "rewards/CineAccuracyORM/std": 0.12161349877715111, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 28, "train_speed(iter/s)": 0.031014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 44.75, "completions/mean_length": 6.968750178813934, "completions/min_length": 2.0, "epoch": 0.04318689501116903, "grad_norm": 33.87919829200639, "kl": 0.226806640625, "learning_rate": 4.264705882352941e-07, "loss": -0.2259342074394226, "memory(GiB)": 87.24, "reward": 0.18750000186264515, "reward_std": 0.280769731849432, "rewards/CineAccuracyORM/mean": 0.16666666977107525, "rewards/CineAccuracyORM/std": 0.28228479623794556, "rewards/Format/mean": 0.02083333395421505, "rewards/Format/std": 0.10206206887960434, "step": 29, "train_speed(iter/s)": 0.031484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.0, "completions/mean_length": 9.270833492279053, "completions/min_length": 2.0, "epoch": 0.044676098287416234, "grad_norm": 27.276580151607458, "kl": 0.30322265625, "learning_rate": 4.4117647058823526e-07, "loss": -0.1895902454853058, "memory(GiB)": 87.24, "reward": 0.22916667722165585, "reward_std": 0.3177132271230221, "rewards/CineAccuracyORM/mean": 0.21875000558793545, "rewards/CineAccuracyORM/std": 0.40782180428504944, "rewards/Format/mean": 0.010416666977107525, "rewards/Format/std": 0.05103103443980217, "step": 30, "train_speed(iter/s)": 0.031916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 48.0, "completions/mean_length": 5.250000238418579, "completions/min_length": 2.0, "epoch": 0.04616530156366344, "grad_norm": 16.625392987588576, "kl": 0.258544921875, "learning_rate": 4.5588235294117646e-07, "loss": -0.06610026955604553, "memory(GiB)": 87.24, "reward": 0.05208333395421505, "reward_std": 0.10661446675658226, "rewards/CineAccuracyORM/mean": 0.05208333395421505, "rewards/CineAccuracyORM/std": 0.1550404578447342, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 31, "train_speed(iter/s)": 0.031871 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.75, "completions/mean_length": 12.166667222976685, "completions/min_length": 2.0, "epoch": 0.04765450483991065, "grad_norm": 26.802967337658675, "kl": 0.2662353515625, "learning_rate": 4.705882352941176e-07, "loss": -0.19836318492889404, "memory(GiB)": 87.24, "reward": 0.20833333861082792, "reward_std": 0.3104106392711401, "rewards/CineAccuracyORM/mean": 0.1770833432674408, "rewards/CineAccuracyORM/std": 0.32350197434425354, "rewards/Format/mean": 0.031250000931322575, "rewards/Format/std": 0.1530931033194065, "step": 32, "train_speed(iter/s)": 0.032243 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 41.5, "completions/mean_length": 5.531250178813934, "completions/min_length": 2.0, "epoch": 0.049143708116157855, "grad_norm": 47.67596444392051, "kl": 0.3974609375, "learning_rate": 4.852941176470588e-07, "loss": -0.13863274455070496, "memory(GiB)": 87.24, "reward": 0.14583333395421505, "reward_std": 0.29847191646695137, "rewards/CineAccuracyORM/mean": 0.14583333395421505, "rewards/CineAccuracyORM/std": 0.3500799164175987, "rewards/Format/mean": 0.0, "rewards/Format/std": 0.0, "step": 33, "train_speed(iter/s)": 0.032663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 77.75, "completions/mean_length": 9.791667103767395, "completions/min_length": 2.0, "epoch": 0.05063291139240506, "grad_norm": 31.15834173654221, "kl": 0.56005859375, "learning_rate": 5e-07, "loss": -0.13992325961589813, "memory(GiB)": 87.24, "reward": 0.35416667722165585, "reward_std": 0.3426096774637699, "rewards/CineAccuracyORM/mean": 0.34375001303851604, "rewards/CineAccuracyORM/std": 0.44963186979293823, "rewards/Format/mean": 0.010416666977107525, "rewards/Format/std": 0.05103103443980217, "step": 34, "train_speed(iter/s)": 0.032716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 69.25, "completions/mean_length": 8.552083492279053, "completions/min_length": 2.0, "epoch": 0.05212211466865227, "grad_norm": 43.90868703605805, "kl": 0.5986328125, "learning_rate": 5.147058823529411e-07, "loss": -0.2771598994731903, "memory(GiB)": 87.24, "reward": 0.4791666828095913, "reward_std": 0.4218745306134224, "rewards/CineAccuracyORM/mean": 0.4479166679084301, "rewards/CineAccuracyORM/std": 0.47418417781591415, "rewards/Format/mean": 0.031250000931322575, "rewards/Format/std": 0.12161349877715111, "step": 35, "train_speed(iter/s)": 0.032802 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 71.5, "completions/mean_length": 9.145833492279053, "completions/min_length": 2.0, "epoch": 0.05361131794489948, "grad_norm": 16.534003428474378, "kl": 0.587890625, "learning_rate": 5.294117647058823e-07, "loss": -0.15340007841587067, "memory(GiB)": 87.24, "reward": 0.4270833432674408, "reward_std": 0.22830967605113983, "rewards/CineAccuracyORM/mean": 0.3958333358168602, "rewards/CineAccuracyORM/std": 0.4891481250524521, "rewards/Format/mean": 0.031250000931322575, "rewards/Format/std": 0.12161349877715111, "step": 36, "train_speed(iter/s)": 0.032457 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 17.593750476837158, "completions/min_length": 3.25, "epoch": 0.055100521221146684, "grad_norm": 7.184920915366972, "kl": 0.62548828125, "learning_rate": 5.441176470588234e-07, "loss": -0.1631685346364975, "memory(GiB)": 87.24, "reward": 0.5208333432674408, "reward_std": 0.4499228745698929, "rewards/CineAccuracyORM/mean": 0.4583333432674408, "rewards/CineAccuracyORM/std": 0.5053668543696404, "rewards/Format/mean": 0.06250000093132257, "rewards/Format/std": 0.20607149228453636, "step": 37, "train_speed(iter/s)": 0.032197 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 59.18750047683716, "completions/min_length": 7.0, "epoch": 0.05658972449739389, "grad_norm": 5.577510552837016, "kl": 0.47509765625, "learning_rate": 5.588235294117647e-07, "loss": -0.14381714165210724, "memory(GiB)": 87.24, "reward": 0.7291666865348816, "reward_std": 0.6225604377686977, "rewards/CineAccuracyORM/mean": 0.5312500074505806, "rewards/CineAccuracyORM/std": 0.49489787220954895, "rewards/Format/mean": 0.19791667442768812, "rewards/Format/std": 0.3814016915857792, "step": 38, "train_speed(iter/s)": 0.03209 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.75, "completions/mean_length": 77.41666793823242, "completions/min_length": 7.0, "epoch": 0.058078927773641105, "grad_norm": 6.436341563926351, "kl": 0.3111572265625, "learning_rate": 5.735294117647059e-07, "loss": -0.2745271921157837, "memory(GiB)": 87.24, "reward": 0.5729166865348816, "reward_std": 0.5852383524179459, "rewards/CineAccuracyORM/mean": 0.2812500074505806, "rewards/CineAccuracyORM/std": 0.43836020678281784, "rewards/Format/mean": 0.2916666753590107, "rewards/Format/std": 0.4552694782614708, "step": 39, "train_speed(iter/s)": 0.031854 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 41.82291793823242, "completions/min_length": 7.0, "epoch": 0.05956813104988831, "grad_norm": 8.066659782039059, "kl": 0.70166015625, "learning_rate": 5.88235294117647e-07, "loss": -0.19753596186637878, "memory(GiB)": 87.24, "reward": 0.5833333507180214, "reward_std": 0.5584849342703819, "rewards/CineAccuracyORM/mean": 0.4375000149011612, "rewards/CineAccuracyORM/std": 0.49907274544239044, "rewards/Format/mean": 0.1458333395421505, "rewards/Format/std": 0.3485904932022095, "step": 40, "train_speed(iter/s)": 0.031767 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 58.32291793823242, "completions/min_length": 7.0, "epoch": 0.06105733432613552, "grad_norm": 6.440957506135595, "kl": 0.349365234375, "learning_rate": 6.029411764705882e-07, "loss": -0.3079247772693634, "memory(GiB)": 87.24, "reward": 0.7604166865348816, "reward_std": 0.6644480228424072, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.4749768152832985, "rewards/Format/mean": 0.3125000186264515, "rewards/Format/std": 0.44086509943008423, "step": 41, "train_speed(iter/s)": 0.031665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.25, "completions/mean_length": 82.19792079925537, "completions/min_length": 7.0, "epoch": 0.06254653760238273, "grad_norm": 8.72430255083817, "kl": 0.3076171875, "learning_rate": 6.176470588235294e-07, "loss": -0.3846609592437744, "memory(GiB)": 87.24, "reward": 0.8333333432674408, "reward_std": 0.6320821866393089, "rewards/CineAccuracyORM/mean": 0.3750000149011612, "rewards/CineAccuracyORM/std": 0.4772557094693184, "rewards/Format/mean": 0.4583333432674408, "rewards/Format/std": 0.5044691935181618, "step": 42, "train_speed(iter/s)": 0.031491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 81.60416984558105, "completions/min_length": 7.0, "epoch": 0.06403574087862993, "grad_norm": 4.980509619511958, "kl": 0.206787109375, "learning_rate": 6.323529411764706e-07, "loss": -0.2202533483505249, "memory(GiB)": 87.24, "reward": 1.083333358168602, "reward_std": 0.6379306763410568, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.49419892579317093, "rewards/Format/mean": 0.5833333432674408, "rewards/Format/std": 0.5017563551664352, "step": 43, "train_speed(iter/s)": 0.031652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 76.64583778381348, "completions/min_length": 7.0, "epoch": 0.06552494415487714, "grad_norm": 4.95265871432255, "kl": 0.251708984375, "learning_rate": 6.470588235294117e-07, "loss": -0.18010585010051727, "memory(GiB)": 87.24, "reward": 1.0520833730697632, "reward_std": 0.6155994832515717, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.49582476168870926, "rewards/Format/mean": 0.5416666865348816, "rewards/Format/std": 0.49780942499637604, "step": 44, "train_speed(iter/s)": 0.03125 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.25, "completions/mean_length": 112.66666984558105, "completions/min_length": 7.0, "epoch": 0.06701414743112435, "grad_norm": 3.3258749496413618, "kl": 0.154052734375, "learning_rate": 6.617647058823529e-07, "loss": -0.08264011144638062, "memory(GiB)": 87.24, "reward": 1.0625000298023224, "reward_std": 0.632362425327301, "rewards/CineAccuracyORM/mean": 0.3541666828095913, "rewards/CineAccuracyORM/std": 0.4643966555595398, "rewards/Format/mean": 0.708333358168602, "rewards/Format/std": 0.4285014718770981, "step": 45, "train_speed(iter/s)": 0.030921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.5, "completions/mean_length": 118.93750381469727, "completions/min_length": 17.0, "epoch": 0.06850335070737155, "grad_norm": 2.6875285477768966, "kl": 0.16162109375, "learning_rate": 6.764705882352941e-07, "loss": -0.13003258407115936, "memory(GiB)": 87.24, "reward": 1.260416716337204, "reward_std": 0.5558846741914749, "rewards/CineAccuracyORM/mean": 0.4270833507180214, "rewards/CineAccuracyORM/std": 0.4674193859100342, "rewards/Format/mean": 0.833333358168602, "rewards/Format/std": 0.36629024147987366, "step": 46, "train_speed(iter/s)": 0.031022 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.0, "completions/mean_length": 108.75000190734863, "completions/min_length": 17.5, "epoch": 0.06999255398361877, "grad_norm": 2.9116091800929467, "kl": 0.17724609375, "learning_rate": 6.911764705882353e-07, "loss": 0.08246521651744843, "memory(GiB)": 87.24, "reward": 1.2395833730697632, "reward_std": 0.5354249477386475, "rewards/CineAccuracyORM/mean": 0.4375000149011612, "rewards/CineAccuracyORM/std": 0.47619424760341644, "rewards/Format/mean": 0.802083358168602, "rewards/Format/std": 0.4046409949660301, "step": 47, "train_speed(iter/s)": 0.030919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.0, "completions/mean_length": 119.60416984558105, "completions/min_length": 41.0, "epoch": 0.07148175725986597, "grad_norm": 1.954233385861661, "kl": 0.170166015625, "learning_rate": 7.058823529411765e-07, "loss": -0.00838075578212738, "memory(GiB)": 87.24, "reward": 1.197916716337204, "reward_std": 0.4235259722918272, "rewards/CineAccuracyORM/mean": 0.3333333395421505, "rewards/CineAccuracyORM/std": 0.4664374068379402, "rewards/Format/mean": 0.864583358168602, "rewards/Format/std": 0.3273683376610279, "step": 48, "train_speed(iter/s)": 0.030633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.0, "completions/mean_length": 110.93750381469727, "completions/min_length": 31.25, "epoch": 0.07297096053611318, "grad_norm": 2.1215907362285105, "kl": 0.18115234375, "learning_rate": 7.205882352941176e-07, "loss": 0.01781153678894043, "memory(GiB)": 87.24, "reward": 1.3125000298023224, "reward_std": 0.43161457777023315, "rewards/CineAccuracyORM/mean": 0.3750000074505806, "rewards/CineAccuracyORM/std": 0.46258050948381424, "rewards/Format/mean": 0.9375000149011612, "rewards/Format/std": 0.20607149228453636, "step": 49, "train_speed(iter/s)": 0.030307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.25, "completions/mean_length": 120.43750381469727, "completions/min_length": 46.75, "epoch": 0.07446016381236038, "grad_norm": 2.122188008776941, "kl": 0.1768798828125, "learning_rate": 7.352941176470589e-07, "loss": 0.034451790153980255, "memory(GiB)": 87.24, "reward": 1.3958333730697632, "reward_std": 0.5304804742336273, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.49285712093114853, "rewards/Format/mean": 0.8854166865348816, "rewards/Format/std": 0.25156543776392937, "step": 50, "train_speed(iter/s)": 0.030087 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.0, "completions/mean_length": 116.62500381469727, "completions/min_length": 41.5, "epoch": 0.0759493670886076, "grad_norm": 2.916069795307611, "kl": 0.162841796875, "learning_rate": 7.5e-07, "loss": 0.12955620884895325, "memory(GiB)": 87.24, "reward": 1.3020833730697632, "reward_std": 0.5066225305199623, "rewards/CineAccuracyORM/mean": 0.4062500074505806, "rewards/CineAccuracyORM/std": 0.49464306980371475, "rewards/Format/mean": 0.8958333432674408, "rewards/Format/std": 0.3100809156894684, "step": 51, "train_speed(iter/s)": 0.030038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.75, "completions/mean_length": 111.23958587646484, "completions/min_length": 44.25, "epoch": 0.0774385703648548, "grad_norm": 2.348758222749112, "kl": 0.197998046875, "learning_rate": 7.647058823529411e-07, "loss": 0.08394712209701538, "memory(GiB)": 87.24, "reward": 1.4791666865348816, "reward_std": 0.34826745092868805, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.4711115136742592, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 52, "train_speed(iter/s)": 0.029831 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 97.13541793823242, "completions/min_length": 45.5, "epoch": 0.07892777364110201, "grad_norm": 2.3790782219732827, "kl": 0.221435546875, "learning_rate": 7.794117647058823e-07, "loss": 0.06365890800952911, "memory(GiB)": 87.24, "reward": 1.3854166865348816, "reward_std": 0.40411991253495216, "rewards/CineAccuracyORM/mean": 0.4166666828095913, "rewards/CineAccuracyORM/std": 0.4719540849328041, "rewards/Format/mean": 0.9687500298023224, "rewards/Format/std": 0.1530931033194065, "step": 53, "train_speed(iter/s)": 0.029483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 102.51041793823242, "completions/min_length": 34.5, "epoch": 0.08041697691734921, "grad_norm": 2.285721840318849, "kl": 0.20849609375, "learning_rate": 7.941176470588235e-07, "loss": 0.03698644042015076, "memory(GiB)": 87.24, "reward": 1.4062500596046448, "reward_std": 0.4374202489852905, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.48499228060245514, "rewards/Format/mean": 0.9583333730697632, "rewards/Format/std": 0.20412413775920868, "step": 54, "train_speed(iter/s)": 0.029299 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 92.31250381469727, "completions/min_length": 40.5, "epoch": 0.08190618019359643, "grad_norm": 1.825510180303823, "kl": 0.23095703125, "learning_rate": 8.088235294117646e-07, "loss": 0.03153917193412781, "memory(GiB)": 87.24, "reward": 1.4687500596046448, "reward_std": 0.23144849576056004, "rewards/CineAccuracyORM/mean": 0.4791666865348816, "rewards/CineAccuracyORM/std": 0.5007770657539368, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 55, "train_speed(iter/s)": 0.029444 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.5, "completions/mean_length": 89.75000190734863, "completions/min_length": 39.0, "epoch": 0.08339538346984364, "grad_norm": 2.20090280585638, "kl": 0.23974609375, "learning_rate": 8.235294117647058e-07, "loss": 0.06573596596717834, "memory(GiB)": 87.24, "reward": 1.4687500298023224, "reward_std": 0.3931274712085724, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.5094214975833893, "rewards/Format/mean": 0.9791666865348816, "rewards/Format/std": 0.10206206887960434, "step": 56, "train_speed(iter/s)": 0.029315 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.0, "completions/mean_length": 89.85416793823242, "completions/min_length": 31.0, "epoch": 0.08488458674609084, "grad_norm": 1.7710078765997919, "kl": 0.248779296875, "learning_rate": 8.38235294117647e-07, "loss": -0.02633129432797432, "memory(GiB)": 87.24, "reward": 1.479166716337204, "reward_std": 0.2910727933049202, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.5004145503044128, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 57, "train_speed(iter/s)": 0.029105 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.75, "completions/mean_length": 77.94791984558105, "completions/min_length": 33.5, "epoch": 0.08637379002233805, "grad_norm": 2.5505074910342254, "kl": 0.284912109375, "learning_rate": 8.529411764705882e-07, "loss": 0.04251826927065849, "memory(GiB)": 87.24, "reward": 1.572916716337204, "reward_std": 0.4383867047727108, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.49164988845586777, "rewards/Format/mean": 0.96875, "rewards/Format/std": 0.08445799350738525, "step": 58, "train_speed(iter/s)": 0.029065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 75.64583396911621, "completions/min_length": 36.5, "epoch": 0.08786299329858525, "grad_norm": 2.315894163693451, "kl": 0.265625, "learning_rate": 8.676470588235294e-07, "loss": -0.024034414440393448, "memory(GiB)": 87.24, "reward": 1.5208333730697632, "reward_std": 0.3917130194604397, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.4736350104212761, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 59, "train_speed(iter/s)": 0.028833 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 82.16666984558105, "completions/min_length": 35.25, "epoch": 0.08935219657483247, "grad_norm": 2.0054101650034406, "kl": 0.298828125, "learning_rate": 8.823529411764705e-07, "loss": 0.04970213770866394, "memory(GiB)": 87.24, "reward": 1.5312500596046448, "reward_std": 0.25108250975608826, "rewards/CineAccuracyORM/mean": 0.5416666772216558, "rewards/CineAccuracyORM/std": 0.4121210500597954, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 60, "train_speed(iter/s)": 0.028735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.75, "completions/mean_length": 88.59375190734863, "completions/min_length": 30.75, "epoch": 0.09084139985107967, "grad_norm": 2.394944800992097, "kl": 0.2880859375, "learning_rate": 8.970588235294118e-07, "loss": 0.06585128605365753, "memory(GiB)": 87.24, "reward": 1.4166666865348816, "reward_std": 0.44653329998254776, "rewards/CineAccuracyORM/mean": 0.4479166716337204, "rewards/CineAccuracyORM/std": 0.4991418570280075, "rewards/Format/mean": 0.9687500298023224, "rewards/Format/std": 0.1530931033194065, "step": 61, "train_speed(iter/s)": 0.028583 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.25, "completions/mean_length": 92.87500381469727, "completions/min_length": 33.75, "epoch": 0.09233060312732688, "grad_norm": 2.375533189486117, "kl": 0.276123046875, "learning_rate": 9.117647058823529e-07, "loss": 0.0773078203201294, "memory(GiB)": 87.24, "reward": 1.4270833730697632, "reward_std": 0.27214794233441353, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.49030808359384537, "rewards/Format/mean": 0.9583333432674408, "rewards/Format/std": 0.13548902794718742, "step": 62, "train_speed(iter/s)": 0.028551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/mean_length": 73.65625286102295, "completions/min_length": 36.5, "epoch": 0.09381980640357408, "grad_norm": 1.4632338447524162, "kl": 0.319580078125, "learning_rate": 9.264705882352941e-07, "loss": 0.04957524687051773, "memory(GiB)": 87.24, "reward": 1.6145833730697632, "reward_std": 0.1645735576748848, "rewards/CineAccuracyORM/mean": 0.6354166939854622, "rewards/CineAccuracyORM/std": 0.4797830134630203, "rewards/Format/mean": 0.9791666716337204, "rewards/Format/std": 0.07058246433734894, "step": 63, "train_speed(iter/s)": 0.028157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.75, "completions/mean_length": 90.89583587646484, "completions/min_length": 35.75, "epoch": 0.0953090096798213, "grad_norm": 2.8081112934237225, "kl": 0.2734375, "learning_rate": 9.411764705882352e-07, "loss": 0.07792927324771881, "memory(GiB)": 87.24, "reward": 1.5208334028720856, "reward_std": 0.5184001103043556, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 0.9270833432674408, "rewards/Format/std": 0.21994702145457268, "step": 64, "train_speed(iter/s)": 0.028041 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 356.75, "completions/mean_length": 82.52083396911621, "completions/min_length": 35.5, "epoch": 0.0967982129560685, "grad_norm": 1.2429824661698152, "kl": 0.29833984375, "learning_rate": 9.558823529411764e-07, "loss": 0.0739765539765358, "memory(GiB)": 87.24, "reward": 1.7083333730697632, "reward_std": 0.1214444600045681, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.35865580290555954, "rewards/Format/mean": 0.9791666716337204, "rewards/Format/std": 0.07058246433734894, "step": 65, "train_speed(iter/s)": 0.027888 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.25, "completions/mean_length": 89.38541984558105, "completions/min_length": 33.75, "epoch": 0.09828741623231571, "grad_norm": 2.3403629580292638, "kl": 0.302734375, "learning_rate": 9.705882352941176e-07, "loss": 0.061508215963840485, "memory(GiB)": 87.24, "reward": 1.4895833730697632, "reward_std": 0.39479869417846203, "rewards/CineAccuracyORM/mean": 0.552083358168602, "rewards/CineAccuracyORM/std": 0.49556995928287506, "rewards/Format/mean": 0.9375000149011612, "rewards/Format/std": 0.21174739301204681, "step": 66, "train_speed(iter/s)": 0.027744 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 66.08333587646484, "completions/min_length": 33.75, "epoch": 0.09977661950856292, "grad_norm": 2.7900096437962216, "kl": 0.37548828125, "learning_rate": 9.85294117647059e-07, "loss": 0.09719212353229523, "memory(GiB)": 87.24, "reward": 1.5312500298023224, "reward_std": 0.3061639852821827, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.46123870462179184, "rewards/Format/mean": 0.9791666865348816, "rewards/Format/std": 0.10206206887960434, "step": 67, "train_speed(iter/s)": 0.027559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 353.5, "completions/mean_length": 81.56250190734863, "completions/min_length": 33.5, "epoch": 0.10126582278481013, "grad_norm": 1.9377223044804968, "kl": 0.36474609375, "learning_rate": 1e-06, "loss": 0.08560063689947128, "memory(GiB)": 87.24, "reward": 1.447916716337204, "reward_std": 0.1588566154241562, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 0.9791666716337204, "rewards/Format/std": 0.07058246433734894, "step": 68, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.5, "completions/mean_length": 87.58333587646484, "completions/min_length": 32.5, "epoch": 0.10275502606105734, "grad_norm": 2.3562933400513097, "kl": 0.2998046875, "learning_rate": 9.999999440703626e-07, "loss": 0.04399222880601883, "memory(GiB)": 87.24, "reward": 1.4270834028720856, "reward_std": 0.34541876427829266, "rewards/CineAccuracyORM/mean": 0.4687500298023224, "rewards/CineAccuracyORM/std": 0.4741317629814148, "rewards/Format/mean": 0.9583333432674408, "rewards/Format/std": 0.13548902794718742, "step": 69, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 63.02083492279053, "completions/min_length": 30.25, "epoch": 0.10424422933730454, "grad_norm": 2.1657233816634283, "kl": 0.359375, "learning_rate": 9.999997762814631e-07, "loss": 0.018564578145742416, "memory(GiB)": 87.24, "reward": 1.4375000298023224, "reward_std": 0.27284668199718, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.44977453351020813, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 70, "train_speed(iter/s)": 0.027452 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.25, "completions/mean_length": 70.21875190734863, "completions/min_length": 32.25, "epoch": 0.10573343261355175, "grad_norm": 2.839825356229611, "kl": 0.3154296875, "learning_rate": 9.999994966333386e-07, "loss": 0.05302850902080536, "memory(GiB)": 87.24, "reward": 1.510416716337204, "reward_std": 0.39840296655893326, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.45247404277324677, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 71, "train_speed(iter/s)": 0.027446 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 63.48958492279053, "completions/min_length": 28.25, "epoch": 0.10722263588979895, "grad_norm": 2.6855620293841533, "kl": 0.36474609375, "learning_rate": 9.999991051260525e-07, "loss": 0.06182008981704712, "memory(GiB)": 87.24, "reward": 1.572916716337204, "reward_std": 0.4120003506541252, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.4845583364367485, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 72, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.75, "completions/mean_length": 57.20833492279053, "completions/min_length": 30.5, "epoch": 0.10871183916604617, "grad_norm": 2.2586764827211216, "kl": 0.39013671875, "learning_rate": 9.999986017596914e-07, "loss": -0.01694866642355919, "memory(GiB)": 87.24, "reward": 1.5312500298023224, "reward_std": 0.31597577407956123, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.49825356900691986, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 73, "train_speed(iter/s)": 0.027539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 58.250000953674316, "completions/min_length": 34.0, "epoch": 0.11020104244229337, "grad_norm": 2.388635344869824, "kl": 0.35791015625, "learning_rate": 9.999979865343688e-07, "loss": 0.009060273878276348, "memory(GiB)": 87.24, "reward": 1.5312500596046448, "reward_std": 0.3009015694260597, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.4398057460784912, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 74, "train_speed(iter/s)": 0.027601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.5, "completions/mean_length": 57.072916984558105, "completions/min_length": 29.5, "epoch": 0.11169024571854058, "grad_norm": 2.518981980335559, "kl": 0.40576171875, "learning_rate": 9.999972594502218e-07, "loss": 0.00646460335701704, "memory(GiB)": 87.24, "reward": 1.4375000298023224, "reward_std": 0.2872287090867758, "rewards/CineAccuracyORM/mean": 0.4375000074505806, "rewards/CineAccuracyORM/std": 0.4989525154232979, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 75, "train_speed(iter/s)": 0.027657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 56.76041793823242, "completions/min_length": 32.75, "epoch": 0.11317944899478778, "grad_norm": 2.4676958236366513, "kl": 0.3974609375, "learning_rate": 9.99996420507413e-07, "loss": -0.0361456498503685, "memory(GiB)": 87.24, "reward": 1.5625000298023224, "reward_std": 0.31213822588324547, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.4246547743678093, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 76, "train_speed(iter/s)": 0.027614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 62.43750190734863, "completions/min_length": 33.5, "epoch": 0.114668652271035, "grad_norm": 2.0063459310427314, "kl": 0.34765625, "learning_rate": 9.999954697061304e-07, "loss": -0.012269075959920883, "memory(GiB)": 87.24, "reward": 1.666666716337204, "reward_std": 0.2328629419207573, "rewards/CineAccuracyORM/mean": 0.6666667014360428, "rewards/CineAccuracyORM/std": 0.4673642963171005, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 77, "train_speed(iter/s)": 0.027575 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.75, "completions/mean_length": 55.98958492279053, "completions/min_length": 29.0, "epoch": 0.11615785554728221, "grad_norm": 2.5806609350108127, "kl": 0.41455078125, "learning_rate": 9.999944070465867e-07, "loss": -0.04392392933368683, "memory(GiB)": 87.24, "reward": 1.479166716337204, "reward_std": 0.3499983586370945, "rewards/CineAccuracyORM/mean": 0.479166679084301, "rewards/CineAccuracyORM/std": 0.4854559972882271, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 78, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.5, "completions/mean_length": 52.65625, "completions/min_length": 27.75, "epoch": 0.11764705882352941, "grad_norm": 2.2791112396913906, "kl": 0.408203125, "learning_rate": 9.999932325290195e-07, "loss": -0.0160086490213871, "memory(GiB)": 87.24, "reward": 1.5625000596046448, "reward_std": 0.22233543917536736, "rewards/CineAccuracyORM/mean": 0.5625000111758709, "rewards/CineAccuracyORM/std": 0.40924668312072754, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 79, "train_speed(iter/s)": 0.027649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.75, "completions/mean_length": 52.18750190734863, "completions/min_length": 27.25, "epoch": 0.11913626209977662, "grad_norm": 1.8710803291566702, "kl": 0.3896484375, "learning_rate": 9.999919461536913e-07, "loss": -0.0009642603690735996, "memory(GiB)": 87.24, "reward": 1.4791666865348816, "reward_std": 0.18831939809024334, "rewards/CineAccuracyORM/mean": 0.479166679084301, "rewards/CineAccuracyORM/std": 0.48736217617988586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 80, "train_speed(iter/s)": 0.027645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.5, "completions/mean_length": 57.95833492279053, "completions/min_length": 32.5, "epoch": 0.12062546537602382, "grad_norm": 1.7395559798556535, "kl": 0.35107421875, "learning_rate": 9.999905479208905e-07, "loss": -0.01741936057806015, "memory(GiB)": 87.24, "reward": 1.5208333730697632, "reward_std": 0.15571126714348793, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.4700479060411453, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 81, "train_speed(iter/s)": 0.027624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.75, "completions/mean_length": 63.82291889190674, "completions/min_length": 31.0, "epoch": 0.12211466865227104, "grad_norm": 2.3732214452088747, "kl": 0.32275390625, "learning_rate": 9.999890378309295e-07, "loss": -0.005933951120823622, "memory(GiB)": 87.24, "reward": 1.5000000298023224, "reward_std": 0.34685954079031944, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4772557094693184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 82, "train_speed(iter/s)": 0.027738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 61.18750190734863, "completions/min_length": 33.75, "epoch": 0.12360387192851824, "grad_norm": 2.4284568777504307, "kl": 0.3310546875, "learning_rate": 9.99987415884146e-07, "loss": -0.015407639555633068, "memory(GiB)": 87.24, "reward": 1.572916716337204, "reward_std": 0.3289499133825302, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.46997862309217453, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 83, "train_speed(iter/s)": 0.027797 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 67.12500381469727, "completions/min_length": 40.5, "epoch": 0.12509307520476545, "grad_norm": 1.7008087485515648, "kl": 0.35693359375, "learning_rate": 9.999856820809034e-07, "loss": 0.01816430687904358, "memory(GiB)": 87.24, "reward": 1.7500000298023224, "reward_std": 0.20198571309447289, "rewards/CineAccuracyORM/mean": 0.7500000298023224, "rewards/CineAccuracyORM/std": 0.35805535316467285, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 84, "train_speed(iter/s)": 0.027856 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.75, "completions/mean_length": 62.57291889190674, "completions/min_length": 38.25, "epoch": 0.12658227848101267, "grad_norm": 1.63237644818562, "kl": 0.3349609375, "learning_rate": 9.99983836421589e-07, "loss": 0.00961359590291977, "memory(GiB)": 87.24, "reward": 1.3958333730697632, "reward_std": 0.1497435588389635, "rewards/CineAccuracyORM/mean": 0.3958333432674408, "rewards/CineAccuracyORM/std": 0.4681183323264122, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 85, "train_speed(iter/s)": 0.027799 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.25, "completions/mean_length": 58.88541793823242, "completions/min_length": 30.25, "epoch": 0.12807148175725985, "grad_norm": 2.6082661783061196, "kl": 0.34521484375, "learning_rate": 9.999818789066163e-07, "loss": 0.0042117564007639885, "memory(GiB)": 87.24, "reward": 1.4375000596046448, "reward_std": 0.36751921474933624, "rewards/CineAccuracyORM/mean": 0.4375000149011612, "rewards/CineAccuracyORM/std": 0.48599863797426224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 86, "train_speed(iter/s)": 0.027805 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/mean_length": 65.68750190734863, "completions/min_length": 35.0, "epoch": 0.12956068503350707, "grad_norm": 2.3127002926588327, "kl": 0.31298828125, "learning_rate": 9.999798095364227e-07, "loss": 0.016584649682044983, "memory(GiB)": 87.24, "reward": 1.4270833432674408, "reward_std": 0.2858142610639334, "rewards/CineAccuracyORM/mean": 0.4270833358168602, "rewards/CineAccuracyORM/std": 0.5022004991769791, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 87, "train_speed(iter/s)": 0.027716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.75, "completions/mean_length": 65.69791793823242, "completions/min_length": 39.25, "epoch": 0.13104988830975428, "grad_norm": 1.9273735481422896, "kl": 0.341796875, "learning_rate": 9.999776283114713e-07, "loss": 0.002256533829495311, "memory(GiB)": 87.24, "reward": 1.5104166865348816, "reward_std": 0.17920634150505066, "rewards/CineAccuracyORM/mean": 0.5104166772216558, "rewards/CineAccuracyORM/std": 0.32011883705854416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 88, "train_speed(iter/s)": 0.027769 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 66.48958492279053, "completions/min_length": 37.25, "epoch": 0.1325390915860015, "grad_norm": 1.666198863679128, "kl": 0.31494140625, "learning_rate": 9.999753352322502e-07, "loss": 0.012014600448310375, "memory(GiB)": 87.24, "reward": 1.5625000596046448, "reward_std": 0.1883194036781788, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.46184761822223663, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 89, "train_speed(iter/s)": 0.027813 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/mean_length": 63.50000286102295, "completions/min_length": 39.0, "epoch": 0.1340282948622487, "grad_norm": 2.1459690821877455, "kl": 0.33740234375, "learning_rate": 9.999729302992724e-07, "loss": -0.0019394507398828864, "memory(GiB)": 87.24, "reward": 1.5625000298023224, "reward_std": 0.2714387737214565, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.47428806871175766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 90, "train_speed(iter/s)": 0.027952 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.25, "completions/mean_length": 62.04166889190674, "completions/min_length": 35.0, "epoch": 0.1355174981384959, "grad_norm": 1.7362974626395538, "kl": 0.36083984375, "learning_rate": 9.999704135130757e-07, "loss": 0.004473376553505659, "memory(GiB)": 87.24, "reward": 1.5104166865348816, "reward_std": 0.18517404980957508, "rewards/CineAccuracyORM/mean": 0.5104166716337204, "rewards/CineAccuracyORM/std": 0.5049133375287056, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 91, "train_speed(iter/s)": 0.027837 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.75, "completions/mean_length": 62.47916889190674, "completions/min_length": 33.25, "epoch": 0.1370067014147431, "grad_norm": 1.7593730714264366, "kl": 0.31640625, "learning_rate": 9.999677848742235e-07, "loss": -0.022574380040168762, "memory(GiB)": 87.24, "reward": 1.8333333730697632, "reward_std": 0.20795341581106186, "rewards/CineAccuracyORM/mean": 0.833333358168602, "rewards/CineAccuracyORM/std": 0.3107326030731201, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 92, "train_speed(iter/s)": 0.027966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.25, "completions/mean_length": 61.36458492279053, "completions/min_length": 36.0, "epoch": 0.13849590469099032, "grad_norm": 1.7973706136223782, "kl": 0.35205078125, "learning_rate": 9.999650443833036e-07, "loss": 0.015058008953928947, "memory(GiB)": 87.24, "reward": 1.5312500298023224, "reward_std": 0.20480806566774845, "rewards/CineAccuracyORM/mean": 0.5312500074505806, "rewards/CineAccuracyORM/std": 0.45572202652692795, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 93, "train_speed(iter/s)": 0.02794 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.5, "completions/mean_length": 58.38541793823242, "completions/min_length": 35.75, "epoch": 0.13998510796723754, "grad_norm": 2.1229562804574886, "kl": 0.34423828125, "learning_rate": 9.999621920409293e-07, "loss": 0.022112365812063217, "memory(GiB)": 87.24, "reward": 1.4687500298023224, "reward_std": 0.19955607876181602, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.4595789238810539, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 94, "train_speed(iter/s)": 0.027914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/mean_length": 59.69791793823242, "completions/min_length": 33.75, "epoch": 0.14147431124348472, "grad_norm": 2.1074207645288516, "kl": 0.36181640625, "learning_rate": 9.999592278477387e-07, "loss": 0.006839253008365631, "memory(GiB)": 87.24, "reward": 1.760416716337204, "reward_std": 0.28792744502425194, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.4097755327820778, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 95, "train_speed(iter/s)": 0.027968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.5, "completions/mean_length": 58.947916984558105, "completions/min_length": 34.0, "epoch": 0.14296351451973194, "grad_norm": 1.8677438494313598, "kl": 0.37646484375, "learning_rate": 9.999561518043946e-07, "loss": 0.015833253040909767, "memory(GiB)": 87.24, "reward": 1.6458333432674408, "reward_std": 0.21463683806359768, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.4309536889195442, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 96, "train_speed(iter/s)": 0.028017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/mean_length": 59.41666793823242, "completions/min_length": 28.5, "epoch": 0.14445271779597915, "grad_norm": 2.1253876582067925, "kl": 0.3388671875, "learning_rate": 9.999529639115857e-07, "loss": -0.0005810856819152832, "memory(GiB)": 87.24, "reward": 1.6562500298023224, "reward_std": 0.25635802187025547, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.46123870462179184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 97, "train_speed(iter/s)": 0.028069 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.75, "completions/mean_length": 60.343750953674316, "completions/min_length": 35.25, "epoch": 0.14594192107222637, "grad_norm": 1.7879926996618039, "kl": 0.359375, "learning_rate": 9.999496641700246e-07, "loss": 0.001552792964503169, "memory(GiB)": 87.24, "reward": 1.6875000298023224, "reward_std": 0.21109222620725632, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 98, "train_speed(iter/s)": 0.028108 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.5, "completions/mean_length": 57.91666889190674, "completions/min_length": 34.25, "epoch": 0.14743112434847358, "grad_norm": 1.8252937692231834, "kl": 0.3857421875, "learning_rate": 9.9994625258045e-07, "loss": 0.014478671364486217, "memory(GiB)": 87.24, "reward": 1.5625000298023224, "reward_std": 0.22830315306782722, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.44265104830265045, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 99, "train_speed(iter/s)": 0.028162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.5, "completions/mean_length": 58.26041793823242, "completions/min_length": 34.5, "epoch": 0.14892032762472077, "grad_norm": 1.5252787845267932, "kl": 0.34765625, "learning_rate": 9.99942729143625e-07, "loss": -0.00010787199425976723, "memory(GiB)": 87.24, "reward": 1.7500000298023224, "reward_std": 0.14204495213925838, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.34605155140161514, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 100, "train_speed(iter/s)": 0.028209 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.75, "completions/mean_length": 58.08333492279053, "completions/min_length": 35.0, "epoch": 0.15040953090096798, "grad_norm": 1.8626545129075989, "kl": 0.36181640625, "learning_rate": 9.99939093860338e-07, "loss": 0.017284924164414406, "memory(GiB)": 87.24, "reward": 1.6145833730697632, "reward_std": 0.19287265837192535, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4605826362967491, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 101, "train_speed(iter/s)": 0.028258 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/mean_length": 59.03125190734863, "completions/min_length": 37.0, "epoch": 0.1518987341772152, "grad_norm": 1.9225584641373488, "kl": 0.35986328125, "learning_rate": 9.999353467314018e-07, "loss": -0.01579505205154419, "memory(GiB)": 87.24, "reward": 1.6458333730697632, "reward_std": 0.20795341208577156, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.47428806871175766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 102, "train_speed(iter/s)": 0.028307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 58.11458492279053, "completions/min_length": 33.5, "epoch": 0.1533879374534624, "grad_norm": 1.9433636027283927, "kl": 0.3359375, "learning_rate": 9.999314877576552e-07, "loss": -0.006546442862600088, "memory(GiB)": 87.24, "reward": 1.5833333730697632, "reward_std": 0.14974356442689896, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.48048195987939835, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 103, "train_speed(iter/s)": 0.028351 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.25, "completions/mean_length": 56.30208396911621, "completions/min_length": 32.0, "epoch": 0.1548771407297096, "grad_norm": 1.8578939052904277, "kl": 0.34375, "learning_rate": 9.999275169399612e-07, "loss": -0.005870466586202383, "memory(GiB)": 87.24, "reward": 1.604166716337204, "reward_std": 0.15571125969290733, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 104, "train_speed(iter/s)": 0.028354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.75, "completions/mean_length": 60.572916984558105, "completions/min_length": 32.75, "epoch": 0.1563663440059568, "grad_norm": 1.9746538817160557, "kl": 0.37548828125, "learning_rate": 9.999234342792084e-07, "loss": 0.009743280708789825, "memory(GiB)": 87.24, "reward": 1.6770833730697632, "reward_std": 0.23144849576056004, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.37717172876000404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 105, "train_speed(iter/s)": 0.028398 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.25, "completions/mean_length": 56.11458492279053, "completions/min_length": 32.25, "epoch": 0.15785554728220402, "grad_norm": 1.4944305537075333, "kl": 0.34130859375, "learning_rate": 9.9991923977631e-07, "loss": -0.0007192387711256742, "memory(GiB)": 87.24, "reward": 1.8854166865348816, "reward_std": 0.11258216388523579, "rewards/CineAccuracyORM/mean": 0.8854166865348816, "rewards/CineAccuracyORM/std": 0.25678587332367897, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 106, "train_speed(iter/s)": 0.028451 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.25, "completions/mean_length": 55.656250953674316, "completions/min_length": 31.0, "epoch": 0.15934475055845124, "grad_norm": 1.5592062737078751, "kl": 0.4287109375, "learning_rate": 9.999149334322045e-07, "loss": 0.0023335497826337814, "memory(GiB)": 87.24, "reward": 1.635416716337204, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.24199935421347618, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 107, "train_speed(iter/s)": 0.028493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.75, "completions/mean_length": 53.42708492279053, "completions/min_length": 29.5, "epoch": 0.16083395383469842, "grad_norm": 2.0550836041603358, "kl": 0.37939453125, "learning_rate": 9.999105152478553e-07, "loss": -0.00656731566414237, "memory(GiB)": 87.24, "reward": 1.5000000596046448, "reward_std": 0.15642697550356388, "rewards/CineAccuracyORM/mean": 0.5000000223517418, "rewards/CineAccuracyORM/std": 0.47292453050613403, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 108, "train_speed(iter/s)": 0.028448 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.75, "completions/mean_length": 54.33333396911621, "completions/min_length": 34.5, "epoch": 0.16232315711094564, "grad_norm": 1.3471050611853017, "kl": 0.39404296875, "learning_rate": 9.999059852242507e-07, "loss": 0.00039348710561171174, "memory(GiB)": 87.24, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000149011612, "rewards/CineAccuracyORM/std": 0.4888460487127304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 109, "train_speed(iter/s)": 0.028374 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/mean_length": 53.27083492279053, "completions/min_length": 33.25, "epoch": 0.16381236038719285, "grad_norm": 2.1236140650819744, "kl": 0.40283203125, "learning_rate": 9.999013433624042e-07, "loss": 0.0008951550116762519, "memory(GiB)": 87.24, "reward": 1.666666716337204, "reward_std": 0.19428709708154202, "rewards/CineAccuracyORM/mean": 0.6666666939854622, "rewards/CineAccuracyORM/std": 0.40787915512919426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 110, "train_speed(iter/s)": 0.028417 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.5, "completions/mean_length": 51.6875, "completions/min_length": 29.75, "epoch": 0.16530156366344007, "grad_norm": 1.050673373466346, "kl": 0.4140625, "learning_rate": 9.998965896633542e-07, "loss": -0.009534396231174469, "memory(GiB)": 87.24, "reward": 1.8958333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 111, "train_speed(iter/s)": 0.028396 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.25, "completions/mean_length": 53.70833396911621, "completions/min_length": 30.5, "epoch": 0.16679076693968728, "grad_norm": 2.0430603898305133, "kl": 0.39208984375, "learning_rate": 9.998917241281646e-07, "loss": -0.0010270563652738929, "memory(GiB)": 87.24, "reward": 1.541666716337204, "reward_std": 0.20795341208577156, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.4700479060411453, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 112, "train_speed(iter/s)": 0.02843 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.5, "completions/mean_length": 56.89583396911621, "completions/min_length": 33.25, "epoch": 0.16827997021593447, "grad_norm": 1.972182008123473, "kl": 0.33349609375, "learning_rate": 9.998867467579233e-07, "loss": 0.017326433211565018, "memory(GiB)": 87.24, "reward": 1.5937500596046448, "reward_std": 0.2048080526292324, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.4674193859100342, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 113, "train_speed(iter/s)": 0.028397 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.25, "completions/mean_length": 54.77083492279053, "completions/min_length": 32.25, "epoch": 0.16976917349218168, "grad_norm": 2.1428110608895667, "kl": 0.376953125, "learning_rate": 9.998816575537442e-07, "loss": -0.02129409834742546, "memory(GiB)": 87.24, "reward": 1.572916716337204, "reward_std": 0.19570155069231987, "rewards/CineAccuracyORM/mean": 0.5729166828095913, "rewards/CineAccuracyORM/std": 0.4095207303762436, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 114, "train_speed(iter/s)": 0.02851 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.25, "completions/mean_length": 56.218750953674316, "completions/min_length": 36.75, "epoch": 0.1712583767684289, "grad_norm": 1.6770912774433737, "kl": 0.3818359375, "learning_rate": 9.998764565167658e-07, "loss": -0.016306228935718536, "memory(GiB)": 99.18, "reward": 1.4895834028720856, "reward_std": 0.10661446116864681, "rewards/CineAccuracyORM/mean": 0.4895833507180214, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 115, "train_speed(iter/s)": 0.028494 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.75, "completions/mean_length": 56.11458396911621, "completions/min_length": 34.0, "epoch": 0.1727475800446761, "grad_norm": 1.9032994016916123, "kl": 0.390625, "learning_rate": 9.998711436481517e-07, "loss": -0.011009774170815945, "memory(GiB)": 99.18, "reward": 1.6354166865348816, "reward_std": 0.1897338479757309, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.42770570516586304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 116, "train_speed(iter/s)": 0.02853 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/mean_length": 55.593750953674316, "completions/min_length": 29.5, "epoch": 0.1742367833209233, "grad_norm": 1.6376601364164558, "kl": 0.43115234375, "learning_rate": 9.998657189490904e-07, "loss": 0.006230839528143406, "memory(GiB)": 99.18, "reward": 1.5312500596046448, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 117, "train_speed(iter/s)": 0.028635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.5, "completions/mean_length": 54.10416793823242, "completions/min_length": 30.5, "epoch": 0.1757259865971705, "grad_norm": 1.9067749970950072, "kl": 0.4072265625, "learning_rate": 9.998601824207955e-07, "loss": 0.014284114353358746, "memory(GiB)": 99.18, "reward": 1.666666716337204, "reward_std": 0.16340987384319305, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.44682279229164124, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 118, "train_speed(iter/s)": 0.028682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.5, "completions/mean_length": 53.166666984558105, "completions/min_length": 33.5, "epoch": 0.17721518987341772, "grad_norm": 2.219435847077353, "kl": 0.43701171875, "learning_rate": 9.998545340645058e-07, "loss": 0.0031664136331528425, "memory(GiB)": 99.18, "reward": 1.4270833730697632, "reward_std": 0.22830968163907528, "rewards/CineAccuracyORM/mean": 0.4270833469927311, "rewards/CineAccuracyORM/std": 0.4593241214752197, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 119, "train_speed(iter/s)": 0.028565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.5, "completions/mean_length": 56.79166793823242, "completions/min_length": 36.5, "epoch": 0.17870439314966494, "grad_norm": 1.540225729202523, "kl": 0.3759765625, "learning_rate": 9.998487738814847e-07, "loss": -0.003639700822532177, "memory(GiB)": 99.18, "reward": 1.5625000298023224, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.5625000074505806, "rewards/CineAccuracyORM/std": 0.481845498085022, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 120, "train_speed(iter/s)": 0.028605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.75, "completions/mean_length": 53.05208492279053, "completions/min_length": 30.0, "epoch": 0.18019359642591215, "grad_norm": 0.932577072568959, "kl": 0.4091796875, "learning_rate": 9.998429018730208e-07, "loss": -0.0018194153672084212, "memory(GiB)": 99.18, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 121, "train_speed(iter/s)": 0.028693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.25, "completions/mean_length": 59.41666793823242, "completions/min_length": 34.5, "epoch": 0.18168279970215934, "grad_norm": 1.924895640659707, "kl": 0.39990234375, "learning_rate": 9.99836918040428e-07, "loss": -0.008868570439517498, "memory(GiB)": 99.18, "reward": 1.666666716337204, "reward_std": 0.1497435588389635, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4147593714296818, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 122, "train_speed(iter/s)": 0.028725 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/mean_length": 61.85416793823242, "completions/min_length": 35.75, "epoch": 0.18317200297840655, "grad_norm": 1.831044368978185, "kl": 0.3603515625, "learning_rate": 9.99830822385045e-07, "loss": 0.008611178025603294, "memory(GiB)": 99.18, "reward": 1.5833334028720856, "reward_std": 0.1746530942618847, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4342890679836273, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 123, "train_speed(iter/s)": 0.028733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.75, "completions/mean_length": 58.79166889190674, "completions/min_length": 34.75, "epoch": 0.18466120625465376, "grad_norm": 1.8675210636482253, "kl": 0.37841796875, "learning_rate": 9.998246149082356e-07, "loss": -0.014568671584129333, "memory(GiB)": 99.18, "reward": 1.5937500298023224, "reward_std": 0.12696419283747673, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.41868456825613976, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 124, "train_speed(iter/s)": 0.028833 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.5, "completions/mean_length": 58.89583396911621, "completions/min_length": 36.25, "epoch": 0.18615040953090098, "grad_norm": 0.9439505561057601, "kl": 0.369140625, "learning_rate": 9.998182956113883e-07, "loss": -0.0043252273462712765, "memory(GiB)": 99.18, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 125, "train_speed(iter/s)": 0.028825 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 63.26041889190674, "completions/min_length": 37.25, "epoch": 0.18763961280714817, "grad_norm": 1.188692263245282, "kl": 0.35498046875, "learning_rate": 9.99811864495917e-07, "loss": 0.00023556812084279954, "memory(GiB)": 99.18, "reward": 1.5625000596046448, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.32781141996383667, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 126, "train_speed(iter/s)": 0.028857 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.25, "completions/mean_length": 58.80208492279053, "completions/min_length": 35.0, "epoch": 0.18912881608339538, "grad_norm": 1.7736931179279012, "kl": 0.35888671875, "learning_rate": 9.998053215632603e-07, "loss": 0.009623806923627853, "memory(GiB)": 99.18, "reward": 1.854166716337204, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.32645734772086143, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 127, "train_speed(iter/s)": 0.028897 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 60.32291889190674, "completions/min_length": 38.25, "epoch": 0.1906180193596426, "grad_norm": 1.7633833579168785, "kl": 0.34716796875, "learning_rate": 9.997986668148819e-07, "loss": -0.013630621135234833, "memory(GiB)": 99.18, "reward": 1.5729166865348816, "reward_std": 0.13293188996613026, "rewards/CineAccuracyORM/mean": 0.5729166772216558, "rewards/CineAccuracyORM/std": 0.31434735655784607, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 128, "train_speed(iter/s)": 0.028912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 61.645835876464844, "completions/min_length": 37.0, "epoch": 0.1921072226358898, "grad_norm": 1.5487264369679112, "kl": 0.3486328125, "learning_rate": 9.99791900252271e-07, "loss": -0.029364466667175293, "memory(GiB)": 99.18, "reward": 1.6458334028720856, "reward_std": 0.12483403459191322, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.44545699656009674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 129, "train_speed(iter/s)": 0.029007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.25, "completions/mean_length": 60.166666984558105, "completions/min_length": 38.5, "epoch": 0.193596425912137, "grad_norm": 1.8646622218642939, "kl": 0.36962890625, "learning_rate": 9.99785021876941e-07, "loss": 0.015839826315641403, "memory(GiB)": 99.18, "reward": 1.5208333730697632, "reward_std": 0.1360772494226694, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.46312014013528824, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 130, "train_speed(iter/s)": 0.02897 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 61.60416793823242, "completions/min_length": 37.75, "epoch": 0.1950856291883842, "grad_norm": 1.6264853448617764, "kl": 0.37353515625, "learning_rate": 9.99778031690431e-07, "loss": -0.018355336040258408, "memory(GiB)": 99.18, "reward": 1.6666666865348816, "reward_std": 0.11713542230427265, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.45616617053747177, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 131, "train_speed(iter/s)": 0.028904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/mean_length": 59.08333492279053, "completions/min_length": 33.25, "epoch": 0.19657483246463142, "grad_norm": 1.6868797362666559, "kl": 0.34423828125, "learning_rate": 9.997709296943045e-07, "loss": 0.004178968723863363, "memory(GiB)": 99.18, "reward": 1.7083333730697632, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.4619346410036087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 132, "train_speed(iter/s)": 0.028888 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.75, "completions/mean_length": 63.85416793823242, "completions/min_length": 37.0, "epoch": 0.19806403574087864, "grad_norm": 1.9889640750044424, "kl": 0.33056640625, "learning_rate": 9.997637158901507e-07, "loss": 0.02550620399415493, "memory(GiB)": 99.18, "reward": 1.7291666865348816, "reward_std": 0.1883193999528885, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.35865580290555954, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 133, "train_speed(iter/s)": 0.028979 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.25, "completions/mean_length": 64.01041793823242, "completions/min_length": 44.5, "epoch": 0.19955323901712585, "grad_norm": 1.693598814571839, "kl": 0.302734375, "learning_rate": 9.997563902795833e-07, "loss": 0.0008639941806904972, "memory(GiB)": 99.18, "reward": 1.5520834028720856, "reward_std": 0.14659819938242435, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4674193859100342, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 134, "train_speed(iter/s)": 0.02907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 64.12500286102295, "completions/min_length": 41.25, "epoch": 0.20104244229337304, "grad_norm": 1.3410733446494332, "kl": 0.35986328125, "learning_rate": 9.997489528642411e-07, "loss": 0.0097710732370615, "memory(GiB)": 99.18, "reward": 1.5312500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5312500074505806, "rewards/CineAccuracyORM/std": 0.3582116588950157, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 135, "train_speed(iter/s)": 0.029104 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.75, "completions/mean_length": 64.03125190734863, "completions/min_length": 36.5, "epoch": 0.20253164556962025, "grad_norm": 1.1418243687580785, "kl": 0.333984375, "learning_rate": 9.997414036457882e-07, "loss": 0.006694551557302475, "memory(GiB)": 99.18, "reward": 1.7187500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.4550659582018852, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 136, "train_speed(iter/s)": 0.02917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 63.00000190734863, "completions/min_length": 36.75, "epoch": 0.20402084884586746, "grad_norm": 1.6544185329793792, "kl": 0.359375, "learning_rate": 9.997337426259132e-07, "loss": 0.021962782368063927, "memory(GiB)": 99.18, "reward": 1.7500000596046448, "reward_std": 0.17009328491985798, "rewards/CineAccuracyORM/mean": 0.7500000298023224, "rewards/CineAccuracyORM/std": 0.40787915512919426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 137, "train_speed(iter/s)": 0.029084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.5, "completions/mean_length": 60.56250190734863, "completions/min_length": 39.5, "epoch": 0.20551005212211468, "grad_norm": 2.254660884197474, "kl": 0.3798828125, "learning_rate": 9.997259698063305e-07, "loss": 0.01393861509859562, "memory(GiB)": 99.18, "reward": 1.8020833730697632, "reward_std": 0.23144850134849548, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.38600777834653854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 138, "train_speed(iter/s)": 0.029067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.5, "completions/mean_length": 62.031250953674316, "completions/min_length": 42.75, "epoch": 0.20699925539836186, "grad_norm": 1.191348933676723, "kl": 0.38525390625, "learning_rate": 9.997180851887784e-07, "loss": 0.0034970776177942753, "memory(GiB)": 99.18, "reward": 1.5520833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.552083358168602, "rewards/CineAccuracyORM/std": 0.2973194234073162, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 139, "train_speed(iter/s)": 0.029092 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.25, "completions/mean_length": 62.750000953674316, "completions/min_length": 39.0, "epoch": 0.20848845867460908, "grad_norm": 1.7224232984199275, "kl": 0.36865234375, "learning_rate": 9.997100887750215e-07, "loss": -0.0011196646373718977, "memory(GiB)": 99.18, "reward": 1.6458334028720856, "reward_std": 0.1034691073000431, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.46184761822223663, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 140, "train_speed(iter/s)": 0.029175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 61.25000190734863, "completions/min_length": 37.25, "epoch": 0.2099776619508563, "grad_norm": 2.2064880305321464, "kl": 0.39404296875, "learning_rate": 9.997019805668481e-07, "loss": -0.008071387186646461, "memory(GiB)": 99.18, "reward": 1.6458333730697632, "reward_std": 0.22233544662594795, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.4754209592938423, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 141, "train_speed(iter/s)": 0.029195 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/mean_length": 60.30208396911621, "completions/min_length": 41.0, "epoch": 0.2114668652271035, "grad_norm": 1.9973948326894966, "kl": 0.3662109375, "learning_rate": 9.996937605660725e-07, "loss": 0.010582007467746735, "memory(GiB)": 99.18, "reward": 1.6666666865348816, "reward_std": 0.15571126341819763, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3568698540329933, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 142, "train_speed(iter/s)": 0.029282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.5, "completions/mean_length": 63.71875190734863, "completions/min_length": 40.75, "epoch": 0.21295606850335072, "grad_norm": 1.6649250081909606, "kl": 0.3759765625, "learning_rate": 9.996854287745335e-07, "loss": 0.017229584977030754, "memory(GiB)": 99.18, "reward": 1.604166716337204, "reward_std": 0.1178511306643486, "rewards/CineAccuracyORM/mean": 0.6041666939854622, "rewards/CineAccuracyORM/std": 0.47292453050613403, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 143, "train_speed(iter/s)": 0.029304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.75, "completions/mean_length": 58.98958492279053, "completions/min_length": 38.0, "epoch": 0.2144452717795979, "grad_norm": 0.856356121142466, "kl": 0.38134765625, "learning_rate": 9.996769851940955e-07, "loss": -0.00436960905790329, "memory(GiB)": 99.18, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 144, "train_speed(iter/s)": 0.029392 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/mean_length": 58.000000953674316, "completions/min_length": 38.0, "epoch": 0.21593447505584512, "grad_norm": 1.4255874676473412, "kl": 0.38330078125, "learning_rate": 9.99668429826647e-07, "loss": -0.00981421023607254, "memory(GiB)": 99.18, "reward": 1.8333333432674408, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.17466487362980843, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 145, "train_speed(iter/s)": 0.029367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 59.47916793823242, "completions/min_length": 35.0, "epoch": 0.21742367833209233, "grad_norm": 1.6708839783622853, "kl": 0.3974609375, "learning_rate": 9.996597626741021e-07, "loss": 0.01708851009607315, "memory(GiB)": 99.18, "reward": 1.5833333432674408, "reward_std": 0.1178511306643486, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.4869779273867607, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 146, "train_speed(iter/s)": 0.02937 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.75, "completions/mean_length": 58.34375, "completions/min_length": 35.75, "epoch": 0.21891288160833955, "grad_norm": 1.4159335399944857, "kl": 0.38427734375, "learning_rate": 9.996509837384e-07, "loss": 0.0033842052798718214, "memory(GiB)": 99.18, "reward": 1.5937500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.4177052788436413, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 147, "train_speed(iter/s)": 0.029379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.5, "completions/mean_length": 56.479166984558105, "completions/min_length": 37.25, "epoch": 0.22040208488458674, "grad_norm": 1.3143050106744474, "kl": 0.3974609375, "learning_rate": 9.996420930215045e-07, "loss": -0.013370170257985592, "memory(GiB)": 99.18, "reward": 1.6250000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.3477324768900871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 148, "train_speed(iter/s)": 0.029351 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.75, "completions/mean_length": 57.52083396911621, "completions/min_length": 36.0, "epoch": 0.22189128816083395, "grad_norm": 1.225412744678349, "kl": 0.40234375, "learning_rate": 9.99633090525405e-07, "loss": 0.011983871459960938, "memory(GiB)": 99.18, "reward": 1.5208333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.48048195987939835, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 149, "train_speed(iter/s)": 0.029306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/mean_length": 55.875000953674316, "completions/min_length": 32.5, "epoch": 0.22338049143708116, "grad_norm": 0.8772200881456304, "kl": 0.4404296875, "learning_rate": 9.99623976252115e-07, "loss": -0.00761718675494194, "memory(GiB)": 99.18, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 150, "train_speed(iter/s)": 0.029276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.0, "completions/mean_length": 54.94791793823242, "completions/min_length": 33.25, "epoch": 0.22486969471332838, "grad_norm": 0.007053803952803251, "kl": 0.4375, "learning_rate": 9.996147502036738e-07, "loss": 0.00043704756535589695, "memory(GiB)": 99.18, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 151, "train_speed(iter/s)": 0.029259 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.75, "completions/mean_length": 56.65625190734863, "completions/min_length": 34.75, "epoch": 0.22635889798957556, "grad_norm": 1.790083953394064, "kl": 0.45947265625, "learning_rate": 9.996054123821453e-07, "loss": 0.01830073818564415, "memory(GiB)": 99.18, "reward": 1.6875000596046448, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.6875000223517418, "rewards/CineAccuracyORM/std": 0.3955155275762081, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 152, "train_speed(iter/s)": 0.029285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.25, "completions/mean_length": 55.50000190734863, "completions/min_length": 33.0, "epoch": 0.22784810126582278, "grad_norm": 0.9293392049285139, "kl": 0.41357421875, "learning_rate": 9.995959627896188e-07, "loss": -0.0061822920106351376, "memory(GiB)": 99.18, "reward": 1.5520834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 153, "train_speed(iter/s)": 0.029308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/mean_length": 54.85416889190674, "completions/min_length": 32.5, "epoch": 0.22933730454207, "grad_norm": 2.2411649200560704, "kl": 0.45458984375, "learning_rate": 9.995864014282082e-07, "loss": -0.012771841138601303, "memory(GiB)": 99.18, "reward": 1.729166716337204, "reward_std": 0.13151744566857815, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.44130611419677734, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 154, "train_speed(iter/s)": 0.029392 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.25, "completions/mean_length": 52.781250953674316, "completions/min_length": 35.0, "epoch": 0.2308265078183172, "grad_norm": 1.071456038215603, "kl": 0.4697265625, "learning_rate": 9.995767283000525e-07, "loss": -0.008930705487728119, "memory(GiB)": 99.18, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500102445483, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 155, "train_speed(iter/s)": 0.029351 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.75, "completions/mean_length": 54.69791889190674, "completions/min_length": 29.25, "epoch": 0.23231571109456442, "grad_norm": 1.1698251154830048, "kl": 0.43115234375, "learning_rate": 9.995669434073157e-07, "loss": -0.006110246293246746, "memory(GiB)": 99.18, "reward": 1.8020833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 156, "train_speed(iter/s)": 0.029308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 74.0, "completions/mean_length": 55.58333396911621, "completions/min_length": 33.0, "epoch": 0.2338049143708116, "grad_norm": 1.968143207942147, "kl": 0.421875, "learning_rate": 9.99557046752187e-07, "loss": 0.002367536537349224, "memory(GiB)": 99.18, "reward": 1.7083333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.3568698540329933, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 157, "train_speed(iter/s)": 0.029287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 52.54166793823242, "completions/min_length": 25.25, "epoch": 0.23529411764705882, "grad_norm": 1.2301135916360373, "kl": 0.4208984375, "learning_rate": 9.995470383368808e-07, "loss": -0.021008988842368126, "memory(GiB)": 109.69, "reward": 1.447916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 158, "train_speed(iter/s)": 0.029208 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.5, "completions/mean_length": 58.03125190734863, "completions/min_length": 32.75, "epoch": 0.23678332092330603, "grad_norm": 1.1926136995362964, "kl": 0.4228515625, "learning_rate": 9.995369181636353e-07, "loss": -0.005246642045676708, "memory(GiB)": 109.69, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 159, "train_speed(iter/s)": 0.029174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 57.48958492279053, "completions/min_length": 34.0, "epoch": 0.23827252419955325, "grad_norm": 0.007956784277271553, "kl": 0.419921875, "learning_rate": 9.995266862347153e-07, "loss": 0.00041937408968806267, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 160, "train_speed(iter/s)": 0.029208 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.5, "completions/mean_length": 52.125000953674316, "completions/min_length": 28.0, "epoch": 0.23976172747580043, "grad_norm": 0.007416042902024405, "kl": 0.42626953125, "learning_rate": 9.995163425524096e-07, "loss": 0.00042625851347111166, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 161, "train_speed(iter/s)": 0.029254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.75, "completions/mean_length": 56.60416793823242, "completions/min_length": 32.75, "epoch": 0.24125093075204765, "grad_norm": 0.007128674127734175, "kl": 0.40771484375, "learning_rate": 9.995058871190325e-07, "loss": 0.0004076409968547523, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 162, "train_speed(iter/s)": 0.029327 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 60.61458492279053, "completions/min_length": 31.0, "epoch": 0.24274013402829486, "grad_norm": 1.2376372055613418, "kl": 0.400390625, "learning_rate": 9.994953199369226e-07, "loss": 0.002402066020295024, "memory(GiB)": 109.69, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 163, "train_speed(iter/s)": 0.029344 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 59.77083492279053, "completions/min_length": 29.0, "epoch": 0.24422933730454208, "grad_norm": 1.514997037908798, "kl": 0.4052734375, "learning_rate": 9.994846410084445e-07, "loss": -0.009616002440452576, "memory(GiB)": 109.69, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 164, "train_speed(iter/s)": 0.029361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 63.54166889190674, "completions/min_length": 35.0, "epoch": 0.2457185405807893, "grad_norm": 2.197425174230845, "kl": 0.39501953125, "learning_rate": 9.99473850335987e-07, "loss": -0.0029207556508481503, "memory(GiB)": 109.69, "reward": 1.6458333432674408, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.645833333954215, "rewards/CineAccuracyORM/std": 0.27867429703474045, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 165, "train_speed(iter/s)": 0.029331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 62.25000286102295, "completions/min_length": 35.0, "epoch": 0.24720774385703648, "grad_norm": 0.6941856332855786, "kl": 0.40185546875, "learning_rate": 9.99462947921964e-07, "loss": 0.012342331930994987, "memory(GiB)": 109.69, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 166, "train_speed(iter/s)": 0.029328 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.25, "completions/mean_length": 54.45833492279053, "completions/min_length": 28.0, "epoch": 0.2486969471332837, "grad_norm": 1.8410862058816813, "kl": 0.44580078125, "learning_rate": 9.994519337688151e-07, "loss": 0.015646085143089294, "memory(GiB)": 109.69, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 167, "train_speed(iter/s)": 0.029344 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.5, "completions/mean_length": 66.87500190734863, "completions/min_length": 35.75, "epoch": 0.2501861504095309, "grad_norm": 1.4728482530574585, "kl": 0.38818359375, "learning_rate": 9.99440807879004e-07, "loss": 0.006760397460311651, "memory(GiB)": 109.69, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 168, "train_speed(iter/s)": 0.02929 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 56.96875190734863, "completions/min_length": 33.5, "epoch": 0.2516753536857781, "grad_norm": 1.1407766879505319, "kl": 0.4150390625, "learning_rate": 9.994295702550196e-07, "loss": 0.030566833913326263, "memory(GiB)": 109.69, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 169, "train_speed(iter/s)": 0.029306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 55.80208396911621, "completions/min_length": 33.25, "epoch": 0.25316455696202533, "grad_norm": 2.3297762828460633, "kl": 0.43994140625, "learning_rate": 9.994182208993763e-07, "loss": 0.015243150293827057, "memory(GiB)": 109.69, "reward": 1.6770833730697632, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 170, "train_speed(iter/s)": 0.029277 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.75, "completions/mean_length": 53.750000953674316, "completions/min_length": 25.0, "epoch": 0.2546537602382725, "grad_norm": 1.6068850641426449, "kl": 0.46484375, "learning_rate": 9.994067598146132e-07, "loss": -0.001693053520284593, "memory(GiB)": 109.69, "reward": 1.6458333730697632, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.6458333395421505, "rewards/CineAccuracyORM/std": 0.2210759073495865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 171, "train_speed(iter/s)": 0.029304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.25, "completions/mean_length": 46.44791793823242, "completions/min_length": 21.5, "epoch": 0.2561429635145197, "grad_norm": 4.415100539421495, "kl": 0.53076171875, "learning_rate": 9.993951870032942e-07, "loss": 0.010592679493129253, "memory(GiB)": 109.69, "reward": 1.7291666865348816, "reward_std": 0.1753452718257904, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 172, "train_speed(iter/s)": 0.029326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/mean_length": 51.11458492279053, "completions/min_length": 26.25, "epoch": 0.25763216679076695, "grad_norm": 0.8407147430841606, "kl": 0.484375, "learning_rate": 9.993835024680081e-07, "loss": -0.008178580552339554, "memory(GiB)": 109.69, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 173, "train_speed(iter/s)": 0.029378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 50.04166793823242, "completions/min_length": 22.25, "epoch": 0.25912137006701413, "grad_norm": 2.030344020276861, "kl": 0.5556640625, "learning_rate": 9.993717062113696e-07, "loss": -0.01910308375954628, "memory(GiB)": 109.69, "reward": 1.6875000596046448, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.6875000111758709, "rewards/CineAccuracyORM/std": 0.28614169359207153, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 174, "train_speed(iter/s)": 0.029344 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.75, "completions/mean_length": 52.531250953674316, "completions/min_length": 26.75, "epoch": 0.2606105733432614, "grad_norm": 0.9653803428998041, "kl": 0.51123046875, "learning_rate": 9.99359798236017e-07, "loss": 0.0018879568669945002, "memory(GiB)": 109.69, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 175, "train_speed(iter/s)": 0.02931 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.75, "completions/mean_length": 52.29166889190674, "completions/min_length": 27.75, "epoch": 0.26209977661950856, "grad_norm": 2.7454196159808117, "kl": 0.50244140625, "learning_rate": 9.993477785446149e-07, "loss": 0.005568271037191153, "memory(GiB)": 109.69, "reward": 1.5, "reward_std": 0.14204495213925838, "rewards/CineAccuracyORM/mean": 0.5, "rewards/CineAccuracyORM/std": 0.3357803151011467, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 176, "train_speed(iter/s)": 0.029335 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.25, "completions/mean_length": 48.562500953674316, "completions/min_length": 26.25, "epoch": 0.26358897989575575, "grad_norm": 2.2819447146758707, "kl": 0.515625, "learning_rate": 9.99335647139852e-07, "loss": -0.016664553433656693, "memory(GiB)": 109.69, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 177, "train_speed(iter/s)": 0.029358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 77.25, "completions/mean_length": 51.875000953674316, "completions/min_length": 29.5, "epoch": 0.265078183172003, "grad_norm": 2.5195332547264586, "kl": 0.61767578125, "learning_rate": 9.993234040244425e-07, "loss": -0.015696339309215546, "memory(GiB)": 109.69, "reward": 1.6875000298023224, "reward_std": 0.12766291946172714, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3261406943202019, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 178, "train_speed(iter/s)": 0.029333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.5, "completions/mean_length": 55.69791793823242, "completions/min_length": 36.5, "epoch": 0.2665673864482502, "grad_norm": 1.819722883129649, "kl": 0.41015625, "learning_rate": 9.993110492011255e-07, "loss": 0.004803737625479698, "memory(GiB)": 109.69, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 179, "train_speed(iter/s)": 0.029401 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.0, "completions/mean_length": 57.312500953674316, "completions/min_length": 32.25, "epoch": 0.2680565897244974, "grad_norm": 0.006030546757475314, "kl": 0.39404296875, "learning_rate": 9.992985826726645e-07, "loss": 0.00039477949030697346, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 180, "train_speed(iter/s)": 0.029471 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/mean_length": 57.92708492279053, "completions/min_length": 35.5, "epoch": 0.2695457930007446, "grad_norm": 1.053171990786447, "kl": 0.4228515625, "learning_rate": 9.992860044418492e-07, "loss": 0.0013477893080562353, "memory(GiB)": 109.69, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 181, "train_speed(iter/s)": 0.02945 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/mean_length": 57.031250953674316, "completions/min_length": 35.75, "epoch": 0.2710349962769918, "grad_norm": 1.8623437961868468, "kl": 0.4228515625, "learning_rate": 9.99273314511493e-07, "loss": -0.0029287091456353664, "memory(GiB)": 109.69, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 182, "train_speed(iter/s)": 0.029487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 59.83333492279053, "completions/min_length": 38.25, "epoch": 0.27252419955323903, "grad_norm": 1.6760506591936501, "kl": 0.38671875, "learning_rate": 9.992605128844352e-07, "loss": -0.002643726533278823, "memory(GiB)": 109.69, "reward": 1.6354166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3641507476568222, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 183, "train_speed(iter/s)": 0.029517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.5, "completions/mean_length": 56.78125190734863, "completions/min_length": 35.0, "epoch": 0.2740134028294862, "grad_norm": 0.006799407081258974, "kl": 0.3955078125, "learning_rate": 9.992475995635396e-07, "loss": 0.00039517099503427744, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 184, "train_speed(iter/s)": 0.029584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/mean_length": 57.20833492279053, "completions/min_length": 37.75, "epoch": 0.2755026061057334, "grad_norm": 0.007525253421530514, "kl": 0.396484375, "learning_rate": 9.992345745516952e-07, "loss": 0.000396231422200799, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 185, "train_speed(iter/s)": 0.029651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.25, "completions/mean_length": 57.57291793823242, "completions/min_length": 32.5, "epoch": 0.27699180938198065, "grad_norm": 0.007632988612270025, "kl": 0.404296875, "learning_rate": 9.99221437851816e-07, "loss": 0.0004041033098474145, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 186, "train_speed(iter/s)": 0.029671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.5, "completions/mean_length": 57.19791793823242, "completions/min_length": 41.75, "epoch": 0.27848101265822783, "grad_norm": 1.4201279191895921, "kl": 0.431640625, "learning_rate": 9.99208189466841e-07, "loss": -0.00423784926533699, "memory(GiB)": 109.69, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 187, "train_speed(iter/s)": 0.029647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.5, "completions/mean_length": 57.83333396911621, "completions/min_length": 37.5, "epoch": 0.2799702159344751, "grad_norm": 1.8378725272567629, "kl": 0.4228515625, "learning_rate": 9.991948293997338e-07, "loss": 0.0016530448338016868, "memory(GiB)": 109.69, "reward": 1.5937500596046448, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4957045316696167, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 188, "train_speed(iter/s)": 0.029587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.25, "completions/mean_length": 58.53125190734863, "completions/min_length": 34.75, "epoch": 0.28145941921072226, "grad_norm": 0.007065435557043247, "kl": 0.3984375, "learning_rate": 9.991813576534835e-07, "loss": 0.00039819508674554527, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 189, "train_speed(iter/s)": 0.029625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.75, "completions/mean_length": 60.187500953674316, "completions/min_length": 39.5, "epoch": 0.28294862248696945, "grad_norm": 2.3656185430943806, "kl": 0.3896484375, "learning_rate": 9.99167774231104e-07, "loss": -0.01003494393080473, "memory(GiB)": 109.69, "reward": 1.604166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4190470837056637, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 190, "train_speed(iter/s)": 0.029603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.5, "completions/mean_length": 60.80208396911621, "completions/min_length": 38.25, "epoch": 0.2844378257632167, "grad_norm": 2.301566823122662, "kl": 0.392578125, "learning_rate": 9.991540791356342e-07, "loss": 0.0009147102828137577, "memory(GiB)": 109.69, "reward": 1.6666666865348816, "reward_std": 0.14518376626074314, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.34605155140161514, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 191, "train_speed(iter/s)": 0.029584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 77.75, "completions/mean_length": 56.58333492279053, "completions/min_length": 36.75, "epoch": 0.2859270290394639, "grad_norm": 2.889241104968077, "kl": 0.43701171875, "learning_rate": 9.991402723701376e-07, "loss": 0.010249637067317963, "memory(GiB)": 109.69, "reward": 1.6875000298023224, "reward_std": 0.2086691353470087, "rewards/CineAccuracyORM/mean": 0.6875000298023224, "rewards/CineAccuracyORM/std": 0.42069846019148827, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 192, "train_speed(iter/s)": 0.029565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/mean_length": 61.08333492279053, "completions/min_length": 41.5, "epoch": 0.2874162323157111, "grad_norm": 1.2375570091903922, "kl": 0.39697265625, "learning_rate": 9.991263539377035e-07, "loss": -0.007811783812940121, "memory(GiB)": 109.69, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 193, "train_speed(iter/s)": 0.029579 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/mean_length": 59.32291793823242, "completions/min_length": 38.5, "epoch": 0.2889054355919583, "grad_norm": 2.4669132488325345, "kl": 0.4150390625, "learning_rate": 9.991123238414453e-07, "loss": -0.007216823752969503, "memory(GiB)": 109.69, "reward": 1.4687500298023224, "reward_std": 0.10205466859042645, "rewards/CineAccuracyORM/mean": 0.4687500074505806, "rewards/CineAccuracyORM/std": 0.4920940324664116, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 194, "train_speed(iter/s)": 0.02953 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.25, "completions/mean_length": 58.63541889190674, "completions/min_length": 38.75, "epoch": 0.2903946388682055, "grad_norm": 0.007222659860440882, "kl": 0.39794921875, "learning_rate": 9.990981820845024e-07, "loss": 0.00039692793507128954, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 195, "train_speed(iter/s)": 0.029553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.5, "completions/mean_length": 61.10416793823242, "completions/min_length": 35.5, "epoch": 0.29188384214445273, "grad_norm": 1.0832778020316962, "kl": 0.3955078125, "learning_rate": 9.990839286700378e-07, "loss": 0.00798419676721096, "memory(GiB)": 109.69, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 196, "train_speed(iter/s)": 0.029527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.75, "completions/mean_length": 58.406250953674316, "completions/min_length": 35.75, "epoch": 0.2933730454206999, "grad_norm": 1.129616636941845, "kl": 0.41845703125, "learning_rate": 9.990695636012408e-07, "loss": 0.0006763365818187594, "memory(GiB)": 109.69, "reward": 1.6041666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 197, "train_speed(iter/s)": 0.029537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 59.322916984558105, "completions/min_length": 39.0, "epoch": 0.29486224869694716, "grad_norm": 1.4383936335610068, "kl": 0.39111328125, "learning_rate": 9.99055086881325e-07, "loss": 0.0056395819410681725, "memory(GiB)": 109.69, "reward": 1.4062500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.40625001303851604, "rewards/CineAccuracyORM/std": 0.43859851360321045, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 198, "train_speed(iter/s)": 0.02947 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.5, "completions/mean_length": 57.708335876464844, "completions/min_length": 36.25, "epoch": 0.29635145197319435, "grad_norm": 2.5951983595119654, "kl": 0.408203125, "learning_rate": 9.99040498513529e-07, "loss": -6.911903619766235e-05, "memory(GiB)": 109.69, "reward": 1.7291666865348816, "reward_std": 0.15642697550356388, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 199, "train_speed(iter/s)": 0.029456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.75, "completions/mean_length": 64.19791889190674, "completions/min_length": 45.5, "epoch": 0.29784065524944153, "grad_norm": 1.444072749453705, "kl": 0.39453125, "learning_rate": 9.990257985011166e-07, "loss": 0.016571206972002983, "memory(GiB)": 109.69, "reward": 1.6666666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 200, "train_speed(iter/s)": 0.029474 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 57.20833396911621, "completions/min_length": 35.5, "epoch": 0.2993298585256888, "grad_norm": 2.1079838759973377, "kl": 0.3994140625, "learning_rate": 9.990109868473763e-07, "loss": 0.0006911167874932289, "memory(GiB)": 109.69, "reward": 1.635416716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6354166772216558, "rewards/CineAccuracyORM/std": 0.2946811020374298, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 201, "train_speed(iter/s)": 0.029536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.75, "completions/mean_length": 58.11458492279053, "completions/min_length": 36.5, "epoch": 0.30081906180193596, "grad_norm": 2.3024161305442092, "kl": 0.4140625, "learning_rate": 9.98996063555622e-07, "loss": -0.0016696762759238482, "memory(GiB)": 109.69, "reward": 1.7708333432674408, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2893018424510956, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 202, "train_speed(iter/s)": 0.029541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.75, "completions/mean_length": 56.093750953674316, "completions/min_length": 37.0, "epoch": 0.30230826507818315, "grad_norm": 1.0214028574396556, "kl": 0.44384765625, "learning_rate": 9.989810286291923e-07, "loss": -0.0025676307268440723, "memory(GiB)": 109.69, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 203, "train_speed(iter/s)": 0.02948 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 78.5, "completions/mean_length": 57.260416984558105, "completions/min_length": 37.0, "epoch": 0.3037974683544304, "grad_norm": 2.885347312566537, "kl": 0.3876953125, "learning_rate": 9.989658820714504e-07, "loss": 0.016919072717428207, "memory(GiB)": 109.69, "reward": 1.6354166865348816, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.36695458739995956, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 204, "train_speed(iter/s)": 0.029502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/mean_length": 57.34375, "completions/min_length": 40.0, "epoch": 0.3052866716306776, "grad_norm": 2.019084851771069, "kl": 0.41064453125, "learning_rate": 9.989506238857851e-07, "loss": 0.002015251200646162, "memory(GiB)": 109.69, "reward": 1.729166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.4154365845024586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 205, "train_speed(iter/s)": 0.029522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 72.25, "completions/mean_length": 53.60416889190674, "completions/min_length": 35.0, "epoch": 0.3067758749069248, "grad_norm": 1.3463188899377074, "kl": 0.40478515625, "learning_rate": 9.989352540756102e-07, "loss": 0.0005918305250816047, "memory(GiB)": 109.69, "reward": 1.8229166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.1942163035273552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 206, "train_speed(iter/s)": 0.029543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/mean_length": 55.593750953674316, "completions/min_length": 37.25, "epoch": 0.308265078183172, "grad_norm": 1.3820146022352862, "kl": 0.42822265625, "learning_rate": 9.98919772644364e-07, "loss": 0.005964584648609161, "memory(GiB)": 109.69, "reward": 1.6041666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.3600961044430733, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 207, "train_speed(iter/s)": 0.029525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.75, "completions/mean_length": 56.71875190734863, "completions/min_length": 41.25, "epoch": 0.3097542814594192, "grad_norm": 1.995778830520487, "kl": 0.43896484375, "learning_rate": 9.989041795955098e-07, "loss": -0.01171582005918026, "memory(GiB)": 109.69, "reward": 1.447916716337204, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 208, "train_speed(iter/s)": 0.029502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.75, "completions/mean_length": 55.010416984558105, "completions/min_length": 38.0, "epoch": 0.31124348473566643, "grad_norm": 0.00659239673827746, "kl": 0.45166015625, "learning_rate": 9.988884749325365e-07, "loss": 0.0004514223837759346, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 209, "train_speed(iter/s)": 0.029522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 71.5, "completions/mean_length": 51.80208396911621, "completions/min_length": 33.25, "epoch": 0.3127326880119136, "grad_norm": 0.007201697774664056, "kl": 0.39697265625, "learning_rate": 9.98872658658957e-07, "loss": 0.00039730509161017835, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 210, "train_speed(iter/s)": 0.029544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.75, "completions/mean_length": 52.781250953674316, "completions/min_length": 34.0, "epoch": 0.31422189128816086, "grad_norm": 0.007444519789582561, "kl": 0.44677734375, "learning_rate": 9.9885673077831e-07, "loss": 0.0004469463601708412, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 211, "train_speed(iter/s)": 0.029568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/mean_length": 53.156250953674316, "completions/min_length": 32.0, "epoch": 0.31571109456440805, "grad_norm": 2.782361534247127, "kl": 0.45654296875, "learning_rate": 9.988406912941589e-07, "loss": -0.022322725504636765, "memory(GiB)": 109.69, "reward": 1.5729166865348816, "reward_std": 0.15115800313651562, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.43248625099658966, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 212, "train_speed(iter/s)": 0.029567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.0, "completions/mean_length": 52.22916793823242, "completions/min_length": 33.75, "epoch": 0.31720029784065523, "grad_norm": 0.007409908918089866, "kl": 0.4345703125, "learning_rate": 9.988245402100919e-07, "loss": 0.00043386773904785514, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 213, "train_speed(iter/s)": 0.029562 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 74.5, "completions/mean_length": 50.812500953674316, "completions/min_length": 29.75, "epoch": 0.3186895011169025, "grad_norm": 2.3954256022172733, "kl": 0.45361328125, "learning_rate": 9.988082775297223e-07, "loss": 0.010276957415044308, "memory(GiB)": 109.69, "reward": 1.447916716337204, "reward_std": 0.12624847888946533, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.4845366030931473, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 214, "train_speed(iter/s)": 0.029536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.75, "completions/mean_length": 54.51041793823242, "completions/min_length": 33.5, "epoch": 0.32017870439314966, "grad_norm": 1.4618618858789205, "kl": 0.4658203125, "learning_rate": 9.987919032566883e-07, "loss": 0.004863282665610313, "memory(GiB)": 109.69, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 215, "train_speed(iter/s)": 0.029519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.25, "completions/mean_length": 56.20833492279053, "completions/min_length": 34.5, "epoch": 0.32166790766939685, "grad_norm": 0.8296047728008038, "kl": 0.44287109375, "learning_rate": 9.987754173946533e-07, "loss": -0.010409131646156311, "memory(GiB)": 109.69, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 216, "train_speed(iter/s)": 0.029498 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.25, "completions/mean_length": 54.86458492279053, "completions/min_length": 36.0, "epoch": 0.3231571109456441, "grad_norm": 1.8652064123504062, "kl": 0.44384765625, "learning_rate": 9.987588199473056e-07, "loss": 0.01006898283958435, "memory(GiB)": 109.69, "reward": 1.4375000596046448, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.4375000149011612, "rewards/CineAccuracyORM/std": 0.48599863797426224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 217, "train_speed(iter/s)": 0.029555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.75, "completions/mean_length": 52.281250953674316, "completions/min_length": 35.5, "epoch": 0.3246463142218913, "grad_norm": 2.956583359569392, "kl": 0.470703125, "learning_rate": 9.98742110918358e-07, "loss": -0.002018975093960762, "memory(GiB)": 109.69, "reward": 1.3333333730697632, "reward_std": 0.19428710639476776, "rewards/CineAccuracyORM/mean": 0.3333333469927311, "rewards/CineAccuracyORM/std": 0.4160936325788498, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 218, "train_speed(iter/s)": 0.029466 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.5, "completions/mean_length": 54.00000190734863, "completions/min_length": 32.5, "epoch": 0.3261355174981385, "grad_norm": 1.7299194831571933, "kl": 0.4296875, "learning_rate": 9.98725290311549e-07, "loss": 0.002762551885098219, "memory(GiB)": 109.69, "reward": 1.510416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5104166846722364, "rewards/CineAccuracyORM/std": 0.36238520964980125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 219, "train_speed(iter/s)": 0.029447 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.75, "completions/mean_length": 55.05208396911621, "completions/min_length": 33.75, "epoch": 0.3276247207743857, "grad_norm": 0.7543994663056502, "kl": 0.3876953125, "learning_rate": 9.987083581306412e-07, "loss": -0.001513495109975338, "memory(GiB)": 109.69, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 220, "train_speed(iter/s)": 0.029504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 78.0, "completions/mean_length": 54.520835876464844, "completions/min_length": 33.0, "epoch": 0.3291139240506329, "grad_norm": 2.0367237110557435, "kl": 0.4501953125, "learning_rate": 9.98691314379423e-07, "loss": -0.007890007458627224, "memory(GiB)": 109.69, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 221, "train_speed(iter/s)": 0.02952 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.25, "completions/mean_length": 55.48958492279053, "completions/min_length": 32.0, "epoch": 0.33060312732688013, "grad_norm": 1.0327210463932142, "kl": 0.5888671875, "learning_rate": 9.986741590617074e-07, "loss": 0.010616863146424294, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.48803938925266266, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 222, "train_speed(iter/s)": 0.029501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.25, "completions/mean_length": 53.45833492279053, "completions/min_length": 33.0, "epoch": 0.3320923306031273, "grad_norm": 0.007091834561918049, "kl": 0.4248046875, "learning_rate": 9.986568921813324e-07, "loss": 0.00042471010237932205, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 223, "train_speed(iter/s)": 0.029524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.75, "completions/mean_length": 56.687500953674316, "completions/min_length": 33.5, "epoch": 0.33358153387937456, "grad_norm": 1.5984520704058858, "kl": 0.41845703125, "learning_rate": 9.986395137421607e-07, "loss": -0.009346762672066689, "memory(GiB)": 109.69, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 224, "train_speed(iter/s)": 0.029554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.75, "completions/mean_length": 54.21875190734863, "completions/min_length": 36.25, "epoch": 0.33507073715562175, "grad_norm": 1.0958750203000833, "kl": 0.4375, "learning_rate": 9.986220237480802e-07, "loss": 0.001192919909954071, "memory(GiB)": 109.69, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 225, "train_speed(iter/s)": 0.029573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.75, "completions/mean_length": 57.312500953674316, "completions/min_length": 35.75, "epoch": 0.33655994043186893, "grad_norm": 1.6238033830781342, "kl": 0.41162109375, "learning_rate": 9.98604422203004e-07, "loss": 0.008282620459794998, "memory(GiB)": 109.69, "reward": 1.7083333730697632, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 226, "train_speed(iter/s)": 0.029587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/mean_length": 56.76041793823242, "completions/min_length": 37.0, "epoch": 0.3380491437081162, "grad_norm": 0.9883968180120073, "kl": 0.40234375, "learning_rate": 9.985867091108694e-07, "loss": -0.009179871529340744, "memory(GiB)": 109.69, "reward": 1.4062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4062500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 227, "train_speed(iter/s)": 0.029601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.75, "completions/mean_length": 56.281250953674316, "completions/min_length": 37.0, "epoch": 0.33953834698436336, "grad_norm": 0.006807546225808994, "kl": 0.39111328125, "learning_rate": 9.9856888447564e-07, "loss": 0.00039111304795369506, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 228, "train_speed(iter/s)": 0.029655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.5, "completions/mean_length": 59.79166793823242, "completions/min_length": 38.25, "epoch": 0.34102755026061055, "grad_norm": 2.020500147714603, "kl": 0.43994140625, "learning_rate": 9.985509483013025e-07, "loss": 0.009794961661100388, "memory(GiB)": 109.69, "reward": 1.791666716337204, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.2986612282693386, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 229, "train_speed(iter/s)": 0.029615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.5, "completions/mean_length": 60.23958492279053, "completions/min_length": 38.0, "epoch": 0.3425167535368578, "grad_norm": 0.006444334137468479, "kl": 0.3720703125, "learning_rate": 9.985329005918702e-07, "loss": 0.0003719486703630537, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 230, "train_speed(iter/s)": 0.029634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 62.12500190734863, "completions/min_length": 42.25, "epoch": 0.344005956813105, "grad_norm": 1.0424711006969785, "kl": 0.39453125, "learning_rate": 9.985147413513805e-07, "loss": -0.006478919647634029, "memory(GiB)": 109.69, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 231, "train_speed(iter/s)": 0.029615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 79.25, "completions/mean_length": 57.937500953674316, "completions/min_length": 41.0, "epoch": 0.3454951600893522, "grad_norm": 0.006977656312430943, "kl": 0.4111328125, "learning_rate": 9.98496470583896e-07, "loss": 0.0004113441682420671, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 232, "train_speed(iter/s)": 0.029598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.5, "completions/mean_length": 58.21875286102295, "completions/min_length": 35.25, "epoch": 0.3469843633655994, "grad_norm": 0.006671804448737725, "kl": 0.3818359375, "learning_rate": 9.984780882935043e-07, "loss": 0.00038174755172804, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 233, "train_speed(iter/s)": 0.029617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.5, "completions/mean_length": 62.26041889190674, "completions/min_length": 41.0, "epoch": 0.3484735666418466, "grad_norm": 1.3878828168584827, "kl": 0.37744140625, "learning_rate": 9.984595944843176e-07, "loss": 0.014605285599827766, "memory(GiB)": 109.69, "reward": 1.635416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.49030808359384537, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 234, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.5, "completions/mean_length": 60.59375, "completions/min_length": 38.5, "epoch": 0.34996276991809383, "grad_norm": 1.279138908062873, "kl": 0.38427734375, "learning_rate": 9.984409891604735e-07, "loss": 0.009828494861721992, "memory(GiB)": 109.69, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 235, "train_speed(iter/s)": 0.029613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.5, "completions/mean_length": 59.08333492279053, "completions/min_length": 38.0, "epoch": 0.351451973194341, "grad_norm": 1.1868634913593241, "kl": 0.404296875, "learning_rate": 9.984222723261343e-07, "loss": 8.107124449452385e-05, "memory(GiB)": 109.69, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 236, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.75, "completions/mean_length": 60.72916889190674, "completions/min_length": 40.75, "epoch": 0.35294117647058826, "grad_norm": 1.0062832643335051, "kl": 0.40576171875, "learning_rate": 9.984034439854874e-07, "loss": 0.005803159438073635, "memory(GiB)": 109.69, "reward": 1.4062500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4062500149011612, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 237, "train_speed(iter/s)": 0.029615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 62.13541793823242, "completions/min_length": 39.5, "epoch": 0.35443037974683544, "grad_norm": 1.6644519873445867, "kl": 0.3984375, "learning_rate": 9.98384504142745e-07, "loss": -0.006929117254912853, "memory(GiB)": 109.69, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 238, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.75, "completions/mean_length": 63.69791793823242, "completions/min_length": 39.25, "epoch": 0.35591958302308263, "grad_norm": 0.007247001659241737, "kl": 0.3984375, "learning_rate": 9.98365452802144e-07, "loss": 0.0003985874936915934, "memory(GiB)": 109.69, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 239, "train_speed(iter/s)": 0.029579 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.25, "completions/mean_length": 57.83333492279053, "completions/min_length": 35.5, "epoch": 0.3574087862993299, "grad_norm": 0.006704921546338591, "kl": 0.37939453125, "learning_rate": 9.98346289967947e-07, "loss": 0.0003790844639297575, "memory(GiB)": 109.69, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 240, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.5, "completions/mean_length": 61.802085876464844, "completions/min_length": 40.0, "epoch": 0.35889798957557706, "grad_norm": 1.793722839715905, "kl": 0.404296875, "learning_rate": 9.98327015644441e-07, "loss": -0.0035373205319046974, "memory(GiB)": 109.69, "reward": 1.5312500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.49030808359384537, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 241, "train_speed(iter/s)": 0.029624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.75, "completions/mean_length": 61.16666793823242, "completions/min_length": 43.25, "epoch": 0.3603871928518243, "grad_norm": 2.0544914996396018, "kl": 0.39208984375, "learning_rate": 9.98307629835938e-07, "loss": 0.0025567542761564255, "memory(GiB)": 109.69, "reward": 1.6145833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.4920940324664116, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 242, "train_speed(iter/s)": 0.029642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.5, "completions/mean_length": 59.73958492279053, "completions/min_length": 38.0, "epoch": 0.3618763961280715, "grad_norm": 1.3377421692690932, "kl": 0.412109375, "learning_rate": 9.982881325467746e-07, "loss": 0.004093638621270657, "memory(GiB)": 109.69, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 243, "train_speed(iter/s)": 0.029622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.25, "completions/mean_length": 59.20833492279053, "completions/min_length": 37.75, "epoch": 0.3633655994043187, "grad_norm": 2.3223670525503413, "kl": 0.42724609375, "learning_rate": 9.98268523781313e-07, "loss": 0.012610271573066711, "memory(GiB)": 109.69, "reward": 1.4583333432674408, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.45833333395421505, "rewards/CineAccuracyORM/std": 0.18116392940282822, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 244, "train_speed(iter/s)": 0.029573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.75, "completions/mean_length": 61.10416889190674, "completions/min_length": 41.75, "epoch": 0.3648548026805659, "grad_norm": 0.007438794044167888, "kl": 0.38232421875, "learning_rate": 9.982488035439401e-07, "loss": 0.00038193093496374786, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 245, "train_speed(iter/s)": 0.029574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/mean_length": 61.187500953674316, "completions/min_length": 40.25, "epoch": 0.3663440059568131, "grad_norm": 0.006714241548954844, "kl": 0.3896484375, "learning_rate": 9.982289718390674e-07, "loss": 0.0003891048254445195, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 246, "train_speed(iter/s)": 0.029607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.75, "completions/mean_length": 61.135416984558105, "completions/min_length": 34.5, "epoch": 0.3678332092330603, "grad_norm": 2.9366558555724036, "kl": 0.39013671875, "learning_rate": 9.982090286711321e-07, "loss": 0.007607848383486271, "memory(GiB)": 109.69, "reward": 1.5625000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.32092025876045227, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 247, "train_speed(iter/s)": 0.029657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.5, "completions/mean_length": 63.208335876464844, "completions/min_length": 38.25, "epoch": 0.36932241250930753, "grad_norm": 2.5394166995847463, "kl": 0.42626953125, "learning_rate": 9.981889740445957e-07, "loss": 0.006423532031476498, "memory(GiB)": 109.69, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 248, "train_speed(iter/s)": 0.029708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 60.38541793823242, "completions/min_length": 41.25, "epoch": 0.3708116157855547, "grad_norm": 3.090613702573629, "kl": 0.4130859375, "learning_rate": 9.981688079639445e-07, "loss": -0.0009454218670725822, "memory(GiB)": 109.69, "reward": 1.4375000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.4375000149011612, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 249, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.5, "completions/mean_length": 62.51041889190674, "completions/min_length": 40.0, "epoch": 0.37230081906180196, "grad_norm": 1.5227824264086818, "kl": 0.400390625, "learning_rate": 9.9814853043369e-07, "loss": 0.010217880830168724, "memory(GiB)": 109.69, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 250, "train_speed(iter/s)": 0.029639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.75, "completions/mean_length": 62.000000953674316, "completions/min_length": 42.75, "epoch": 0.37379002233804914, "grad_norm": 1.6091930518210342, "kl": 0.39599609375, "learning_rate": 9.981281414583693e-07, "loss": -0.0002935338416136801, "memory(GiB)": 109.69, "reward": 1.7291666865348816, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 251, "train_speed(iter/s)": 0.02956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/mean_length": 63.86458492279053, "completions/min_length": 38.25, "epoch": 0.37527922561429633, "grad_norm": 1.2615678764085418, "kl": 0.39501953125, "learning_rate": 9.981076410425432e-07, "loss": -0.0025093541480600834, "memory(GiB)": 109.69, "reward": 1.479166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.4791666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 252, "train_speed(iter/s)": 0.029576 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.5, "completions/mean_length": 59.697916984558105, "completions/min_length": 42.75, "epoch": 0.37676842889054357, "grad_norm": 0.0072341520636442416, "kl": 0.39794921875, "learning_rate": 9.980870291907979e-07, "loss": 0.00039772590389475226, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 253, "train_speed(iter/s)": 0.02956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 62.94791889190674, "completions/min_length": 44.0, "epoch": 0.37825763216679076, "grad_norm": 0.007146698183184676, "kl": 0.3984375, "learning_rate": 9.980663059077452e-07, "loss": 0.00039809889858588576, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 254, "train_speed(iter/s)": 0.029587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/mean_length": 60.81250190734863, "completions/min_length": 40.5, "epoch": 0.379746835443038, "grad_norm": 1.5302149254713215, "kl": 0.4296875, "learning_rate": 9.98045471198021e-07, "loss": -0.010323361493647099, "memory(GiB)": 109.69, "reward": 1.5625000298023224, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.5625000074505806, "rewards/CineAccuracyORM/std": 0.3600961044430733, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 255, "train_speed(iter/s)": 0.029604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 61.87500190734863, "completions/min_length": 36.5, "epoch": 0.3812360387192852, "grad_norm": 1.3497274260354588, "kl": 0.390625, "learning_rate": 9.980245250662864e-07, "loss": -0.006504558026790619, "memory(GiB)": 109.69, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 256, "train_speed(iter/s)": 0.029585 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.75, "completions/mean_length": 60.1875, "completions/min_length": 37.0, "epoch": 0.3827252419955324, "grad_norm": 2.0054094829107676, "kl": 0.4130859375, "learning_rate": 9.980034675172273e-07, "loss": 0.0005853887996636331, "memory(GiB)": 109.69, "reward": 1.5312500298023224, "reward_std": 0.12028077617287636, "rewards/CineAccuracyORM/mean": 0.5312500074505806, "rewards/CineAccuracyORM/std": 0.38600777834653854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 257, "train_speed(iter/s)": 0.029586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 61.83333492279053, "completions/min_length": 38.75, "epoch": 0.3842144452717796, "grad_norm": 0.007666869025916167, "kl": 0.39208984375, "learning_rate": 9.979822985555551e-07, "loss": 0.00039179096347652376, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 258, "train_speed(iter/s)": 0.029569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.75, "completions/mean_length": 61.66666793823242, "completions/min_length": 41.5, "epoch": 0.3857036485480268, "grad_norm": 1.870695589830846, "kl": 0.40234375, "learning_rate": 9.979610181860051e-07, "loss": -0.013990018516778946, "memory(GiB)": 109.69, "reward": 1.8645833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.27542631328105927, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 259, "train_speed(iter/s)": 0.029559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.75, "completions/mean_length": 62.44791793823242, "completions/min_length": 43.25, "epoch": 0.387192851824274, "grad_norm": 1.20913325341095, "kl": 0.3779296875, "learning_rate": 9.979396264133387e-07, "loss": 0.004468006081879139, "memory(GiB)": 109.69, "reward": 1.6041667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 260, "train_speed(iter/s)": 0.029555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.75, "completions/mean_length": 61.43750190734863, "completions/min_length": 40.0, "epoch": 0.38868205510052123, "grad_norm": 1.8690244446064455, "kl": 0.349609375, "learning_rate": 9.979181232423412e-07, "loss": 0.00147923082113266, "memory(GiB)": 109.69, "reward": 1.6145833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.362364798784256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 261, "train_speed(iter/s)": 0.029572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 63.31250190734863, "completions/min_length": 39.75, "epoch": 0.3901712583767684, "grad_norm": 2.364400442198695, "kl": 0.35791015625, "learning_rate": 9.978965086778236e-07, "loss": 0.020178884267807007, "memory(GiB)": 109.69, "reward": 1.6979166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3470437303185463, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 262, "train_speed(iter/s)": 0.029586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.25, "completions/mean_length": 61.45833492279053, "completions/min_length": 41.75, "epoch": 0.39166046165301566, "grad_norm": 0.0071876992021725535, "kl": 0.3740234375, "learning_rate": 9.978747827246212e-07, "loss": 0.00037403771420940757, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 263, "train_speed(iter/s)": 0.029598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/mean_length": 60.364585876464844, "completions/min_length": 39.75, "epoch": 0.39314966492926284, "grad_norm": 2.033739257496895, "kl": 0.3828125, "learning_rate": 9.978529453875948e-07, "loss": -0.018405158072710037, "memory(GiB)": 109.69, "reward": 1.6770833730697632, "reward_std": 0.15115800499916077, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.4749870151281357, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 264, "train_speed(iter/s)": 0.029645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 61.64583492279053, "completions/min_length": 36.5, "epoch": 0.39463886820551003, "grad_norm": 0.9556926560699347, "kl": 0.39306640625, "learning_rate": 9.978309966716294e-07, "loss": 0.0011708089150488377, "memory(GiB)": 109.69, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 265, "train_speed(iter/s)": 0.029631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.75, "completions/mean_length": 60.32291793823242, "completions/min_length": 40.25, "epoch": 0.39612807148175727, "grad_norm": 1.6796433905370638, "kl": 0.3837890625, "learning_rate": 9.978089365816355e-07, "loss": 0.0030285059474408627, "memory(GiB)": 109.69, "reward": 1.6250000298023224, "reward_std": 0.1360772456973791, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.3477324768900871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 266, "train_speed(iter/s)": 0.02965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.25, "completions/mean_length": 62.90625286102295, "completions/min_length": 43.0, "epoch": 0.39761727475800446, "grad_norm": 2.1519319635344845, "kl": 0.38623046875, "learning_rate": 9.977867651225485e-07, "loss": 0.007552006281912327, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.35134297609329224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 267, "train_speed(iter/s)": 0.029662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.5, "completions/mean_length": 64.02083587646484, "completions/min_length": 41.25, "epoch": 0.3991064780342517, "grad_norm": 1.5414216733048909, "kl": 0.35888671875, "learning_rate": 9.977644822993284e-07, "loss": -0.00044432803406380117, "memory(GiB)": 109.69, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666772216558, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 268, "train_speed(iter/s)": 0.029678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 62.354166984558105, "completions/min_length": 40.25, "epoch": 0.4005956813104989, "grad_norm": 1.9173793954415714, "kl": 0.37060546875, "learning_rate": 9.977420881169606e-07, "loss": -0.006164237856864929, "memory(GiB)": 109.69, "reward": 1.7083333730697632, "reward_std": 0.10346910171210766, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.3712640330195427, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 269, "train_speed(iter/s)": 0.02966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.5, "completions/mean_length": 64.48958492279053, "completions/min_length": 39.75, "epoch": 0.40208488458674607, "grad_norm": 0.8478291753088769, "kl": 0.3681640625, "learning_rate": 9.977195825804548e-07, "loss": -0.015195367857813835, "memory(GiB)": 109.69, "reward": 1.7083334028720856, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 270, "train_speed(iter/s)": 0.029653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.5, "completions/mean_length": 64.19791889190674, "completions/min_length": 43.0, "epoch": 0.4035740878629933, "grad_norm": 2.233519417036817, "kl": 0.357421875, "learning_rate": 9.976969656948458e-07, "loss": 0.0034467712976038456, "memory(GiB)": 109.69, "reward": 1.635416716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4550659582018852, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 271, "train_speed(iter/s)": 0.029665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.75, "completions/mean_length": 64.83333683013916, "completions/min_length": 39.75, "epoch": 0.4050632911392405, "grad_norm": 1.3424926074089334, "kl": 0.365234375, "learning_rate": 9.976742374651935e-07, "loss": 0.002691563218832016, "memory(GiB)": 109.69, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 272, "train_speed(iter/s)": 0.029646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.75, "completions/mean_length": 68.35416984558105, "completions/min_length": 48.0, "epoch": 0.40655249441548774, "grad_norm": 1.1696610851234799, "kl": 0.34521484375, "learning_rate": 9.976513978965829e-07, "loss": 0.0022079318296164274, "memory(GiB)": 109.69, "reward": 1.6979166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 273, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.75, "completions/mean_length": 65.09375095367432, "completions/min_length": 40.25, "epoch": 0.40804169769173493, "grad_norm": 1.5837566309518762, "kl": 0.35205078125, "learning_rate": 9.976284469941232e-07, "loss": -0.0033039450645446777, "memory(GiB)": 109.69, "reward": 1.697916716337204, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 274, "train_speed(iter/s)": 0.02964 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.5, "completions/mean_length": 64.34375190734863, "completions/min_length": 46.25, "epoch": 0.4095309009679821, "grad_norm": 1.075611851391064, "kl": 0.353515625, "learning_rate": 9.976053847629495e-07, "loss": -0.007815254852175713, "memory(GiB)": 109.69, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 275, "train_speed(iter/s)": 0.029625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.25, "completions/mean_length": 66.28125190734863, "completions/min_length": 41.25, "epoch": 0.41102010424422936, "grad_norm": 1.5592384723578987, "kl": 0.369140625, "learning_rate": 9.975822112082208e-07, "loss": -0.003388018114492297, "memory(GiB)": 109.69, "reward": 1.6875000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 276, "train_speed(iter/s)": 0.029589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.25, "completions/mean_length": 65.2291669845581, "completions/min_length": 41.75, "epoch": 0.41250930752047654, "grad_norm": 1.0105601606857506, "kl": 0.36376953125, "learning_rate": 9.975589263351215e-07, "loss": 0.0004937461926601827, "memory(GiB)": 109.69, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 277, "train_speed(iter/s)": 0.029568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 68.25000190734863, "completions/min_length": 48.25, "epoch": 0.41399851079672373, "grad_norm": 1.1005559572886645, "kl": 0.33056640625, "learning_rate": 9.975355301488609e-07, "loss": 0.00105850154068321, "memory(GiB)": 109.69, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 278, "train_speed(iter/s)": 0.029583 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/mean_length": 71.22916793823242, "completions/min_length": 45.75, "epoch": 0.41548771407297097, "grad_norm": 0.006894983235729774, "kl": 0.36279296875, "learning_rate": 9.975120226546732e-07, "loss": 0.00036213762359693646, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 279, "train_speed(iter/s)": 0.029574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 66.59375381469727, "completions/min_length": 40.75, "epoch": 0.41697691734921816, "grad_norm": 1.5454928013214373, "kl": 0.34912109375, "learning_rate": 9.974884038578175e-07, "loss": 0.014599892310798168, "memory(GiB)": 109.69, "reward": 1.5520833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.3470437303185463, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 280, "train_speed(iter/s)": 0.029583 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 69.31250381469727, "completions/min_length": 46.0, "epoch": 0.4184661206254654, "grad_norm": 2.1205345729406004, "kl": 0.349609375, "learning_rate": 9.974646737635778e-07, "loss": -0.023379143327474594, "memory(GiB)": 109.69, "reward": 1.8125, "reward_std": 0.1308017373085022, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.2966044694185257, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 281, "train_speed(iter/s)": 0.029597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 69.60416984558105, "completions/min_length": 44.0, "epoch": 0.4199553239017126, "grad_norm": 1.6207222090307631, "kl": 0.361328125, "learning_rate": 9.97440832377263e-07, "loss": 0.007089504972100258, "memory(GiB)": 109.69, "reward": 1.4791666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.479166679084301, "rewards/CineAccuracyORM/std": 0.32092025876045227, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 282, "train_speed(iter/s)": 0.02961 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 69.39583396911621, "completions/min_length": 45.5, "epoch": 0.42144452717795977, "grad_norm": 0.006684138204532202, "kl": 0.3359375, "learning_rate": 9.974168797042066e-07, "loss": 0.000336061988491565, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 283, "train_speed(iter/s)": 0.029594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 72.15625190734863, "completions/min_length": 48.0, "epoch": 0.422933730454207, "grad_norm": 1.017103719514461, "kl": 0.35302734375, "learning_rate": 9.973928157497674e-07, "loss": -0.0006853320519439876, "memory(GiB)": 109.69, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 284, "train_speed(iter/s)": 0.02959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/mean_length": 72.15625381469727, "completions/min_length": 47.0, "epoch": 0.4244229337304542, "grad_norm": 0.006820982770004704, "kl": 0.33203125, "learning_rate": 9.97368640519329e-07, "loss": 0.0003323708369862288, "memory(GiB)": 109.69, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 285, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.25, "completions/mean_length": 70.77083778381348, "completions/min_length": 49.0, "epoch": 0.42591213700670144, "grad_norm": 0.006447788736564575, "kl": 0.33740234375, "learning_rate": 9.973443540182996e-07, "loss": 0.00033704942325130105, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 286, "train_speed(iter/s)": 0.029642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.5, "completions/mean_length": 76.64583587646484, "completions/min_length": 43.5, "epoch": 0.4274013402829486, "grad_norm": 1.4529947663254976, "kl": 0.32568359375, "learning_rate": 9.97319956252113e-07, "loss": -0.008492881432175636, "memory(GiB)": 109.69, "reward": 1.5937500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.362364798784256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 287, "train_speed(iter/s)": 0.02968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 72.83333587646484, "completions/min_length": 45.25, "epoch": 0.4288905435591958, "grad_norm": 0.9373759220861358, "kl": 0.34912109375, "learning_rate": 9.97295447226227e-07, "loss": -0.007738334126770496, "memory(GiB)": 109.69, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 288, "train_speed(iter/s)": 0.029665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.75, "completions/mean_length": 77.04166984558105, "completions/min_length": 51.5, "epoch": 0.43037974683544306, "grad_norm": 1.083228048910597, "kl": 0.302734375, "learning_rate": 9.97270826946125e-07, "loss": 0.0018197386525571346, "memory(GiB)": 109.69, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 289, "train_speed(iter/s)": 0.029645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 74.90625190734863, "completions/min_length": 50.25, "epoch": 0.43186895011169024, "grad_norm": 0.006095619216840793, "kl": 0.3515625, "learning_rate": 9.97246095417315e-07, "loss": 0.00035153093631379306, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 290, "train_speed(iter/s)": 0.029628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 77.14583396911621, "completions/min_length": 53.25, "epoch": 0.43335815338793743, "grad_norm": 0.005998345063233441, "kl": 0.33740234375, "learning_rate": 9.972212526453296e-07, "loss": 0.0003372747451066971, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 291, "train_speed(iter/s)": 0.029647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 70.46875190734863, "completions/min_length": 44.0, "epoch": 0.43484735666418467, "grad_norm": 0.8040323082690393, "kl": 0.3173828125, "learning_rate": 9.971962986357269e-07, "loss": 0.001070383470505476, "memory(GiB)": 109.69, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 292, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 76.92708396911621, "completions/min_length": 49.75, "epoch": 0.43633655994043186, "grad_norm": 0.006185973858833744, "kl": 0.32470703125, "learning_rate": 9.971712333940894e-07, "loss": 0.0003250161244068295, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 293, "train_speed(iter/s)": 0.029699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 77.95833396911621, "completions/min_length": 47.5, "epoch": 0.4378257632166791, "grad_norm": 1.3140367567368973, "kl": 0.32177734375, "learning_rate": 9.971460569260248e-07, "loss": -0.0066781300120055676, "memory(GiB)": 109.69, "reward": 1.6458333432674408, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 294, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 81.28125190734863, "completions/min_length": 56.0, "epoch": 0.4393149664929263, "grad_norm": 0.9661332434605253, "kl": 0.30615234375, "learning_rate": 9.971207692371656e-07, "loss": -0.0009799127001315355, "memory(GiB)": 109.69, "reward": 1.5000000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5000000223517418, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 295, "train_speed(iter/s)": 0.029663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 83.38541793823242, "completions/min_length": 49.5, "epoch": 0.44080416976917347, "grad_norm": 1.1556813662850558, "kl": 0.2958984375, "learning_rate": 9.97095370333169e-07, "loss": -0.0008748803520575166, "memory(GiB)": 109.69, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 296, "train_speed(iter/s)": 0.029671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 78.42708587646484, "completions/min_length": 49.75, "epoch": 0.4422933730454207, "grad_norm": 0.7946353500015853, "kl": 0.314453125, "learning_rate": 9.970698602197173e-07, "loss": -0.0010447558015584946, "memory(GiB)": 109.69, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 297, "train_speed(iter/s)": 0.029651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 84.85416984558105, "completions/min_length": 53.25, "epoch": 0.4437825763216679, "grad_norm": 0.9908201342824906, "kl": 0.2802734375, "learning_rate": 9.970442389025171e-07, "loss": -0.0007967769633978605, "memory(GiB)": 109.69, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 298, "train_speed(iter/s)": 0.029641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.0, "completions/mean_length": 81.10416984558105, "completions/min_length": 54.75, "epoch": 0.44527177959791514, "grad_norm": 1.4785811032522322, "kl": 0.28564453125, "learning_rate": 9.97018506387301e-07, "loss": 0.0023195254616439342, "memory(GiB)": 109.69, "reward": 1.385416716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.385416679084301, "rewards/CineAccuracyORM/std": 0.45113223791122437, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 299, "train_speed(iter/s)": 0.029678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 85.44791984558105, "completions/min_length": 59.25, "epoch": 0.4467609828741623, "grad_norm": 1.9151312416908046, "kl": 0.28271484375, "learning_rate": 9.969926626798257e-07, "loss": -0.0006890117074362934, "memory(GiB)": 109.69, "reward": 1.7187500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.4550659582018852, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 300, "train_speed(iter/s)": 0.029715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 87.43750190734863, "completions/min_length": 59.0, "epoch": 0.4482501861504095, "grad_norm": 0.6671659954912104, "kl": 0.284423828125, "learning_rate": 9.969667077858727e-07, "loss": -0.0015758241061121225, "memory(GiB)": 109.69, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500102445483, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 301, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 86.54166984558105, "completions/min_length": 57.25, "epoch": 0.44973938942665675, "grad_norm": 1.5814137119282097, "kl": 0.2919921875, "learning_rate": 9.969406417112488e-07, "loss": 0.004496965557336807, "memory(GiB)": 109.69, "reward": 1.5520833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.49030808359384537, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 302, "train_speed(iter/s)": 0.029704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 91.61458587646484, "completions/min_length": 58.75, "epoch": 0.45122859270290394, "grad_norm": 1.9161965335713427, "kl": 0.29052734375, "learning_rate": 9.969144644617853e-07, "loss": 0.00086929154349491, "memory(GiB)": 109.69, "reward": 1.4270834028720856, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.4270833469927311, "rewards/CineAccuracyORM/std": 0.46318942308425903, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 303, "train_speed(iter/s)": 0.02968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 87.98958587646484, "completions/min_length": 61.0, "epoch": 0.4527177959791511, "grad_norm": 1.6860534975754626, "kl": 0.28271484375, "learning_rate": 9.968881760433387e-07, "loss": 0.00808725319802761, "memory(GiB)": 109.69, "reward": 1.5729166865348816, "reward_std": 0.12696419283747673, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.49128737300634384, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 304, "train_speed(iter/s)": 0.029648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 89.90625190734863, "completions/min_length": 61.25, "epoch": 0.45420699925539837, "grad_norm": 0.6730153662906556, "kl": 0.27734375, "learning_rate": 9.968617764617898e-07, "loss": 0.009814141318202019, "memory(GiB)": 109.69, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 305, "train_speed(iter/s)": 0.029622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 92.36458778381348, "completions/min_length": 60.5, "epoch": 0.45569620253164556, "grad_norm": 1.6109370490343986, "kl": 0.26806640625, "learning_rate": 9.968352657230454e-07, "loss": -0.009009933099150658, "memory(GiB)": 109.69, "reward": 1.5625000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.4154365845024586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 306, "train_speed(iter/s)": 0.02958 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 90.75000381469727, "completions/min_length": 60.25, "epoch": 0.4571854058078928, "grad_norm": 0.6792082128660607, "kl": 0.33544921875, "learning_rate": 9.968086438330358e-07, "loss": -0.0026779472827911377, "memory(GiB)": 109.69, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 307, "train_speed(iter/s)": 0.029563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 94.66666793823242, "completions/min_length": 61.5, "epoch": 0.45867460908414, "grad_norm": 1.1820417167589006, "kl": 0.253662109375, "learning_rate": 9.967819107977174e-07, "loss": 0.0039260657504200935, "memory(GiB)": 109.69, "reward": 1.3750000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.3750000111758709, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 308, "train_speed(iter/s)": 0.029561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 92.90625190734863, "completions/min_length": 56.5, "epoch": 0.46016381236038717, "grad_norm": 0.013139531987305159, "kl": 0.2783203125, "learning_rate": 9.967550666230702e-07, "loss": 0.0002786653640214354, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 309, "train_speed(iter/s)": 0.029537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 92.09375190734863, "completions/min_length": 63.0, "epoch": 0.4616530156366344, "grad_norm": 1.2526037398412404, "kl": 0.2841796875, "learning_rate": 9.967281113151e-07, "loss": -0.004408514127135277, "memory(GiB)": 109.69, "reward": 1.6145833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.3735327273607254, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 310, "train_speed(iter/s)": 0.029519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 90.86458587646484, "completions/min_length": 55.25, "epoch": 0.4631422189128816, "grad_norm": 0.9477739551975156, "kl": 0.275634765625, "learning_rate": 9.967010448798374e-07, "loss": 0.0010402326006442308, "memory(GiB)": 109.69, "reward": 1.7083334028720856, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 311, "train_speed(iter/s)": 0.029553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 94.94791793823242, "completions/min_length": 60.75, "epoch": 0.46463142218912884, "grad_norm": 0.6228911717317689, "kl": 0.260498046875, "learning_rate": 9.966738673233378e-07, "loss": -0.0029212969820946455, "memory(GiB)": 109.69, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 312, "train_speed(iter/s)": 0.029533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 98.03125381469727, "completions/min_length": 70.0, "epoch": 0.466120625465376, "grad_norm": 0.00467238295393964, "kl": 0.26708984375, "learning_rate": 9.966465786516806e-07, "loss": 0.0002669786335900426, "memory(GiB)": 109.69, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 313, "train_speed(iter/s)": 0.029539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 96.09375381469727, "completions/min_length": 70.5, "epoch": 0.4676098287416232, "grad_norm": 0.004520911837730449, "kl": 0.26220703125, "learning_rate": 9.966191788709714e-07, "loss": 0.00026177536346949637, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 314, "train_speed(iter/s)": 0.029572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 93.64583778381348, "completions/min_length": 60.75, "epoch": 0.46909903201787045, "grad_norm": 0.005996126542248526, "kl": 0.26904296875, "learning_rate": 9.9659166798734e-07, "loss": 0.0002688353124540299, "memory(GiB)": 109.69, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 315, "train_speed(iter/s)": 0.029561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 95.55208396911621, "completions/min_length": 66.75, "epoch": 0.47058823529411764, "grad_norm": 0.004061961236477731, "kl": 0.27783203125, "learning_rate": 9.96564046006941e-07, "loss": 0.00027777164359577, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 316, "train_speed(iter/s)": 0.029593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 92.42708587646484, "completions/min_length": 61.25, "epoch": 0.4720774385703649, "grad_norm": 0.004278588941935126, "kl": 0.27294921875, "learning_rate": 9.965363129359537e-07, "loss": 0.00027265079552307725, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 317, "train_speed(iter/s)": 0.029601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 95.00000190734863, "completions/min_length": 67.75, "epoch": 0.47356664184661207, "grad_norm": 0.004818861156836564, "kl": 0.269287109375, "learning_rate": 9.965084687805827e-07, "loss": 0.00026913851615972817, "memory(GiB)": 109.69, "reward": 1.2500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.2500000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 318, "train_speed(iter/s)": 0.029634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 95.27083587646484, "completions/min_length": 60.5, "epoch": 0.47505584512285925, "grad_norm": 0.004480478368580182, "kl": 0.2724609375, "learning_rate": 9.964805135470575e-07, "loss": 0.0002725249214563519, "memory(GiB)": 109.69, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 319, "train_speed(iter/s)": 0.029613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 90.87500190734863, "completions/min_length": 59.0, "epoch": 0.4765450483991065, "grad_norm": 1.8236399834263208, "kl": 0.294921875, "learning_rate": 9.964524472416317e-07, "loss": 0.0007044151425361633, "memory(GiB)": 109.69, "reward": 1.8541666865348816, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 320, "train_speed(iter/s)": 0.029597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.0, "completions/mean_length": 94.19791984558105, "completions/min_length": 58.75, "epoch": 0.4780342516753537, "grad_norm": 0.004704654115788009, "kl": 0.27978515625, "learning_rate": 9.964242698705849e-07, "loss": 0.00027958405553363264, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 321, "train_speed(iter/s)": 0.029613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 95.54166984558105, "completions/min_length": 60.0, "epoch": 0.47952345495160087, "grad_norm": 0.7258443808846547, "kl": 0.26904296875, "learning_rate": 9.963959814402202e-07, "loss": 0.005590090062469244, "memory(GiB)": 109.69, "reward": 1.7083334028720856, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 322, "train_speed(iter/s)": 0.029592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 91.65625190734863, "completions/min_length": 57.5, "epoch": 0.4810126582278481, "grad_norm": 1.5684413264577526, "kl": 0.26806640625, "learning_rate": 9.963675819568668e-07, "loss": 0.005329744424670935, "memory(GiB)": 109.69, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000111758709, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 323, "train_speed(iter/s)": 0.029574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 96.30208587646484, "completions/min_length": 60.75, "epoch": 0.4825018615040953, "grad_norm": 1.9720011904173729, "kl": 0.26953125, "learning_rate": 9.96339071426878e-07, "loss": -0.0075444504618644714, "memory(GiB)": 109.69, "reward": 1.510416716337204, "reward_std": 0.0876726359128952, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 324, "train_speed(iter/s)": 0.029534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 97.98958587646484, "completions/min_length": 67.0, "epoch": 0.48399106478034254, "grad_norm": 0.004991669996346375, "kl": 0.26171875, "learning_rate": 9.96310449856632e-07, "loss": 0.0002621935273054987, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 325, "train_speed(iter/s)": 0.029516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 91.41666984558105, "completions/min_length": 55.5, "epoch": 0.4854802680565897, "grad_norm": 1.40029323606245, "kl": 0.264404296875, "learning_rate": 9.962817172525322e-07, "loss": 0.006259795278310776, "memory(GiB)": 109.69, "reward": 1.7395833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 326, "train_speed(iter/s)": 0.029548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 89.64583587646484, "completions/min_length": 60.75, "epoch": 0.4869694713328369, "grad_norm": 1.148698870824261, "kl": 0.28564453125, "learning_rate": 9.962528736210064e-07, "loss": 0.0055189891718328, "memory(GiB)": 109.69, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 327, "train_speed(iter/s)": 0.029579 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 94.56250381469727, "completions/min_length": 62.75, "epoch": 0.48845867460908415, "grad_norm": 0.004320254494984607, "kl": 0.283203125, "learning_rate": 9.962239189685075e-07, "loss": 0.00028304418083280325, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 328, "train_speed(iter/s)": 0.02956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 93.71875190734863, "completions/min_length": 54.25, "epoch": 0.48994787788533134, "grad_norm": 1.7627317026800535, "kl": 0.26806640625, "learning_rate": 9.961948533015134e-07, "loss": -0.0019875429570674896, "memory(GiB)": 109.69, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 329, "train_speed(iter/s)": 0.02951 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 91.57291793823242, "completions/min_length": 63.25, "epoch": 0.4914370811615786, "grad_norm": 0.005953294814039742, "kl": 0.27783203125, "learning_rate": 9.961656766265262e-07, "loss": 0.000277314567938447, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 330, "train_speed(iter/s)": 0.029502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 94.46875381469727, "completions/min_length": 58.75, "epoch": 0.49292628443782577, "grad_norm": 0.9170860241280995, "kl": 0.2744140625, "learning_rate": 9.961363889500738e-07, "loss": -0.004294336307793856, "memory(GiB)": 109.69, "reward": 1.479166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.479166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 331, "train_speed(iter/s)": 0.029483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 92.02083587646484, "completions/min_length": 59.5, "epoch": 0.49441548771407295, "grad_norm": 0.7670555521328041, "kl": 0.28271484375, "learning_rate": 9.96106990278708e-07, "loss": 0.0011542674619704485, "memory(GiB)": 109.69, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833386108279, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 332, "train_speed(iter/s)": 0.029489 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 92.85416793823242, "completions/min_length": 62.75, "epoch": 0.4959046909903202, "grad_norm": 1.2097888480361432, "kl": 0.2802734375, "learning_rate": 9.960774806190063e-07, "loss": -0.00015340428217314184, "memory(GiB)": 109.69, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 333, "train_speed(iter/s)": 0.029521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 92.11458396911621, "completions/min_length": 58.5, "epoch": 0.4973938942665674, "grad_norm": 0.01143228563322524, "kl": 0.26123046875, "learning_rate": 9.960478599775698e-07, "loss": 0.00026164224254898727, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 334, "train_speed(iter/s)": 0.029527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 93.34375190734863, "completions/min_length": 62.75, "epoch": 0.49888309754281457, "grad_norm": 0.007030671212062643, "kl": 0.27197265625, "learning_rate": 9.960181283610258e-07, "loss": 0.00027134534320794046, "memory(GiB)": 109.69, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 335, "train_speed(iter/s)": 0.029559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 89.91666793823242, "completions/min_length": 59.75, "epoch": 0.5003723008190618, "grad_norm": 2.0783342221400374, "kl": 0.27197265625, "learning_rate": 9.959882857760256e-07, "loss": 0.0024394220672547817, "memory(GiB)": 109.69, "reward": 1.3125000596046448, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.3125000102445483, "rewards/CineAccuracyORM/std": 0.4190470837056637, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 336, "train_speed(iter/s)": 0.029544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 93.05208587646484, "completions/min_length": 57.25, "epoch": 0.501861504095309, "grad_norm": 0.0047471150510396, "kl": 0.27392578125, "learning_rate": 9.959583322292456e-07, "loss": 0.00027357693761587143, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 337, "train_speed(iter/s)": 0.029553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 92.67708778381348, "completions/min_length": 62.5, "epoch": 0.5033507073715562, "grad_norm": 0.004548240767560126, "kl": 0.2763671875, "learning_rate": 9.959282677273868e-07, "loss": 0.0002759082126431167, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 338, "train_speed(iter/s)": 0.029537 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 93.19791793823242, "completions/min_length": 60.25, "epoch": 0.5048399106478034, "grad_norm": 0.9054429336762384, "kl": 0.261474609375, "learning_rate": 9.958980922771754e-07, "loss": -0.002090791007503867, "memory(GiB)": 109.69, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 339, "train_speed(iter/s)": 0.029568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 91.81250190734863, "completions/min_length": 56.5, "epoch": 0.5063291139240507, "grad_norm": 0.00531842723731678, "kl": 0.263427734375, "learning_rate": 9.958678058853623e-07, "loss": 0.000263282738160342, "memory(GiB)": 109.69, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 340, "train_speed(iter/s)": 0.029532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 93.16666984558105, "completions/min_length": 63.0, "epoch": 0.5078183172002978, "grad_norm": 0.005803525531737974, "kl": 0.26025390625, "learning_rate": 9.958374085587226e-07, "loss": 0.000260138331213966, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 341, "train_speed(iter/s)": 0.029539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 88.70833587646484, "completions/min_length": 57.75, "epoch": 0.509307520476545, "grad_norm": 0.005084475790252795, "kl": 0.26513671875, "learning_rate": 9.958069003040574e-07, "loss": 0.00026569183683022857, "memory(GiB)": 109.69, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 342, "train_speed(iter/s)": 0.029571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 94.12500190734863, "completions/min_length": 60.75, "epoch": 0.5107967237527923, "grad_norm": 1.0135518000476218, "kl": 0.27001953125, "learning_rate": 9.957762811281916e-07, "loss": 0.0017002805834636092, "memory(GiB)": 109.69, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 343, "train_speed(iter/s)": 0.029601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.25, "completions/mean_length": 85.23958587646484, "completions/min_length": 58.75, "epoch": 0.5122859270290394, "grad_norm": 0.00442047220050333, "kl": 0.29052734375, "learning_rate": 9.95745551037975e-07, "loss": 0.0002908675523940474, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 344, "train_speed(iter/s)": 0.02958 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 91.76041984558105, "completions/min_length": 64.5, "epoch": 0.5137751303052867, "grad_norm": 0.0056956231949417315, "kl": 0.28759765625, "learning_rate": 9.957147100402832e-07, "loss": 0.00028807282797060907, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 345, "train_speed(iter/s)": 0.029544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 89.14583587646484, "completions/min_length": 57.5, "epoch": 0.5152643335815339, "grad_norm": 1.6082746722594805, "kl": 0.259765625, "learning_rate": 9.956837581420155e-07, "loss": 0.005911373998969793, "memory(GiB)": 109.69, "reward": 1.9062500298023224, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.19888615608215332, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 346, "train_speed(iter/s)": 0.029524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 92.19791793823242, "completions/min_length": 61.75, "epoch": 0.5167535368577811, "grad_norm": 2.3086534898355806, "kl": 0.275634765625, "learning_rate": 9.956526953500964e-07, "loss": 0.008114833384752274, "memory(GiB)": 109.69, "reward": 1.6250000596046448, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.4190470837056637, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 347, "train_speed(iter/s)": 0.029554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 91.76041984558105, "completions/min_length": 65.75, "epoch": 0.5182427401340283, "grad_norm": 1.4150480189784298, "kl": 0.2890625, "learning_rate": 9.95621521671475e-07, "loss": 0.01228850893676281, "memory(GiB)": 109.69, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 348, "train_speed(iter/s)": 0.029536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 88.28125190734863, "completions/min_length": 57.5, "epoch": 0.5197319434102755, "grad_norm": 0.004939981263060526, "kl": 0.255615234375, "learning_rate": 9.955902371131262e-07, "loss": 0.0002556056424509734, "memory(GiB)": 109.69, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 349, "train_speed(iter/s)": 0.029544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 92.65625190734863, "completions/min_length": 64.0, "epoch": 0.5212211466865228, "grad_norm": 0.004858050696169452, "kl": 0.28857421875, "learning_rate": 9.955588416820482e-07, "loss": 0.00028828575159423053, "memory(GiB)": 109.69, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 350, "train_speed(iter/s)": 0.029574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 88.20833587646484, "completions/min_length": 62.0, "epoch": 0.5227103499627699, "grad_norm": 0.0050420489252047205, "kl": 0.28662109375, "learning_rate": 9.955273353852649e-07, "loss": 0.0002869927557185292, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 351, "train_speed(iter/s)": 0.029541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 89.89583587646484, "completions/min_length": 61.0, "epoch": 0.5241995532390171, "grad_norm": 2.1664689114733116, "kl": 0.26416015625, "learning_rate": 9.95495718229825e-07, "loss": -0.0066675832495093346, "memory(GiB)": 109.69, "reward": 1.479166716337204, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.4791666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 352, "train_speed(iter/s)": 0.029497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 91.44791984558105, "completions/min_length": 60.0, "epoch": 0.5256887565152644, "grad_norm": 1.724316343054604, "kl": 0.267822265625, "learning_rate": 9.954639902228017e-07, "loss": 0.0112705547362566, "memory(GiB)": 109.69, "reward": 1.4687500596046448, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.4791666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 353, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 91.42708778381348, "completions/min_length": 55.25, "epoch": 0.5271779597915115, "grad_norm": 0.005118179287188166, "kl": 0.274169921875, "learning_rate": 9.954321513712934e-07, "loss": 0.0002741580829024315, "memory(GiB)": 109.69, "reward": 1.5833333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 354, "train_speed(iter/s)": 0.029456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.75, "completions/mean_length": 85.23958396911621, "completions/min_length": 60.0, "epoch": 0.5286671630677587, "grad_norm": 1.9795525821797142, "kl": 0.28955078125, "learning_rate": 9.954002016824225e-07, "loss": 0.006158304400742054, "memory(GiB)": 109.69, "reward": 1.604166716337204, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.44184649735689163, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 355, "train_speed(iter/s)": 0.029465 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.5, "completions/mean_length": 83.04166984558105, "completions/min_length": 54.25, "epoch": 0.530156366344006, "grad_norm": 0.005604165133403527, "kl": 0.28857421875, "learning_rate": 9.953681411633374e-07, "loss": 0.00028863101033493876, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 356, "train_speed(iter/s)": 0.029487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 86.46875381469727, "completions/min_length": 56.0, "epoch": 0.5316455696202531, "grad_norm": 1.9706427334690226, "kl": 0.287109375, "learning_rate": 9.953359698212103e-07, "loss": 0.00035566900623962283, "memory(GiB)": 109.69, "reward": 1.5937500298023224, "reward_std": 0.11572097800672054, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 357, "train_speed(iter/s)": 0.029493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.5, "completions/mean_length": 83.43750381469727, "completions/min_length": 54.0, "epoch": 0.5331347728965004, "grad_norm": 0.9722068046285434, "kl": 0.28759765625, "learning_rate": 9.953036876632384e-07, "loss": 0.009208189323544502, "memory(GiB)": 109.69, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 358, "train_speed(iter/s)": 0.029438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 84.39583587646484, "completions/min_length": 59.25, "epoch": 0.5346239761727476, "grad_norm": 2.0794280008622823, "kl": 0.30810546875, "learning_rate": 9.95271294696644e-07, "loss": -0.025121908634901047, "memory(GiB)": 109.69, "reward": 1.90625, "reward_std": 0.10661446675658226, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.19503945857286453, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 359, "train_speed(iter/s)": 0.029467 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 85.87500381469727, "completions/min_length": 56.25, "epoch": 0.5361131794489948, "grad_norm": 0.005436791087416862, "kl": 0.29736328125, "learning_rate": 9.95238790928674e-07, "loss": 0.0002971945214085281, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 360, "train_speed(iter/s)": 0.029456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 84.82291984558105, "completions/min_length": 59.0, "epoch": 0.537602382725242, "grad_norm": 1.7134697151739549, "kl": 0.3056640625, "learning_rate": 9.952061763666e-07, "loss": 0.002625805791467428, "memory(GiB)": 109.69, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 361, "train_speed(iter/s)": 0.029443 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 85.82291984558105, "completions/min_length": 53.5, "epoch": 0.5390915860014892, "grad_norm": 1.4454471102637243, "kl": 0.28125, "learning_rate": 9.951734510177186e-07, "loss": -0.00011921868281206116, "memory(GiB)": 109.69, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 362, "train_speed(iter/s)": 0.029471 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 83.12500381469727, "completions/min_length": 54.5, "epoch": 0.5405807892777365, "grad_norm": 1.0388562314216303, "kl": 0.2861328125, "learning_rate": 9.951406148893509e-07, "loss": -0.007520279847085476, "memory(GiB)": 109.69, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 363, "train_speed(iter/s)": 0.029404 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.5, "completions/mean_length": 84.83333587646484, "completions/min_length": 60.0, "epoch": 0.5420699925539836, "grad_norm": 1.3706245687595169, "kl": 0.30615234375, "learning_rate": 9.951076679888432e-07, "loss": 8.40427674120292e-05, "memory(GiB)": 109.69, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 364, "train_speed(iter/s)": 0.029434 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 82.41666793823242, "completions/min_length": 57.75, "epoch": 0.5435591958302308, "grad_norm": 1.2546749448674992, "kl": 0.29345703125, "learning_rate": 9.950746103235662e-07, "loss": -0.004722751211374998, "memory(GiB)": 109.69, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 365, "train_speed(iter/s)": 0.029419 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 86.81250190734863, "completions/min_length": 58.25, "epoch": 0.5450483991064781, "grad_norm": 1.9249242989661652, "kl": 0.2998046875, "learning_rate": 9.950414419009153e-07, "loss": -0.001959745306521654, "memory(GiB)": 109.69, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 366, "train_speed(iter/s)": 0.029448 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 80.19791984558105, "completions/min_length": 54.0, "epoch": 0.5465376023827252, "grad_norm": 0.9174506152684098, "kl": 0.31298828125, "learning_rate": 9.950081627283115e-07, "loss": -0.005577364470809698, "memory(GiB)": 109.69, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 367, "train_speed(iter/s)": 0.029448 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 88.51041984558105, "completions/min_length": 61.5, "epoch": 0.5480268056589724, "grad_norm": 1.763531715532896, "kl": 0.283203125, "learning_rate": 9.949747728131992e-07, "loss": -0.009891970083117485, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.833333358168602, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 368, "train_speed(iter/s)": 0.029458 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.0, "completions/mean_length": 89.55208587646484, "completions/min_length": 58.75, "epoch": 0.5495160089352197, "grad_norm": 1.3366861318434793, "kl": 0.2822265625, "learning_rate": 9.94941272163049e-07, "loss": 0.00013069497072137892, "memory(GiB)": 109.69, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 369, "train_speed(iter/s)": 0.029448 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 85.80208587646484, "completions/min_length": 56.5, "epoch": 0.5510052122114668, "grad_norm": 0.005263273524771884, "kl": 0.2841796875, "learning_rate": 9.949076607853552e-07, "loss": 0.0002838578075170517, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 370, "train_speed(iter/s)": 0.029452 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 85.64583778381348, "completions/min_length": 51.5, "epoch": 0.552494415487714, "grad_norm": 1.378355675363711, "kl": 0.27978515625, "learning_rate": 9.948739386876376e-07, "loss": -0.0029936775099486113, "memory(GiB)": 109.69, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 371, "train_speed(iter/s)": 0.029424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 88.35416984558105, "completions/min_length": 60.25, "epoch": 0.5539836187639613, "grad_norm": 0.6414655523533747, "kl": 0.2890625, "learning_rate": 9.9484010587744e-07, "loss": 0.0016320878639817238, "memory(GiB)": 109.69, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 372, "train_speed(iter/s)": 0.029385 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.75, "completions/mean_length": 86.75000190734863, "completions/min_length": 60.5, "epoch": 0.5554728220402085, "grad_norm": 2.0190984891735986, "kl": 0.31005859375, "learning_rate": 9.948061623623318e-07, "loss": 0.0004856909508816898, "memory(GiB)": 109.69, "reward": 1.6562500298023224, "reward_std": 0.11258216388523579, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.34470974653959274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 373, "train_speed(iter/s)": 0.029317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 82.67708587646484, "completions/min_length": 52.0, "epoch": 0.5569620253164557, "grad_norm": 1.463465629375419, "kl": 0.29296875, "learning_rate": 9.947721081499067e-07, "loss": -0.0012823132565245032, "memory(GiB)": 109.69, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 374, "train_speed(iter/s)": 0.02931 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 87.10416984558105, "completions/min_length": 56.25, "epoch": 0.5584512285927029, "grad_norm": 0.9847016112845953, "kl": 0.28564453125, "learning_rate": 9.947379432477833e-07, "loss": 0.007329202722758055, "memory(GiB)": 109.69, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 375, "train_speed(iter/s)": 0.029324 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 80.15625381469727, "completions/min_length": 55.5, "epoch": 0.5599404318689502, "grad_norm": 0.9916078029659973, "kl": 0.30078125, "learning_rate": 9.947036676636048e-07, "loss": 0.0037410797085613012, "memory(GiB)": 109.69, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 376, "train_speed(iter/s)": 0.029354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 83.13541984558105, "completions/min_length": 56.25, "epoch": 0.5614296351451973, "grad_norm": 1.0044219544054, "kl": 0.2841796875, "learning_rate": 9.946692814050395e-07, "loss": -0.0025890145916491747, "memory(GiB)": 109.69, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 377, "train_speed(iter/s)": 0.029361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.75, "completions/mean_length": 83.48958587646484, "completions/min_length": 56.25, "epoch": 0.5629188384214445, "grad_norm": 1.3647430903829831, "kl": 0.30810546875, "learning_rate": 9.9463478447978e-07, "loss": 0.0020818645134568214, "memory(GiB)": 109.69, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 378, "train_speed(iter/s)": 0.029391 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.25, "completions/mean_length": 80.12500381469727, "completions/min_length": 53.0, "epoch": 0.5644080416976918, "grad_norm": 0.004744909564896166, "kl": 0.33447265625, "learning_rate": 9.94600176895544e-07, "loss": 0.00033414358040317893, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 379, "train_speed(iter/s)": 0.029356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/mean_length": 78.85416793823242, "completions/min_length": 61.75, "epoch": 0.5658972449739389, "grad_norm": 1.8613158433830728, "kl": 0.31982421875, "learning_rate": 9.94565458660074e-07, "loss": 0.008225228637456894, "memory(GiB)": 109.69, "reward": 1.604166716337204, "reward_std": 0.12483403459191322, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.4344591647386551, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 380, "train_speed(iter/s)": 0.029325 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 82.20833396911621, "completions/min_length": 57.0, "epoch": 0.5673864482501861, "grad_norm": 0.0056156085797379055, "kl": 0.31103515625, "learning_rate": 9.945306297811368e-07, "loss": 0.00031046586809679866, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 381, "train_speed(iter/s)": 0.029286 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 79.60416793823242, "completions/min_length": 53.5, "epoch": 0.5688756515264334, "grad_norm": 0.005243775275862437, "kl": 0.3203125, "learning_rate": 9.944956902665244e-07, "loss": 0.00032077287323772907, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 382, "train_speed(iter/s)": 0.029293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 78.13541793823242, "completions/min_length": 52.25, "epoch": 0.5703648548026806, "grad_norm": 0.005439549147337028, "kl": 0.31884765625, "learning_rate": 9.944606401240538e-07, "loss": 0.0003184173256158829, "memory(GiB)": 109.69, "reward": 1.4166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.4166666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 383, "train_speed(iter/s)": 0.029283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.25, "completions/mean_length": 76.12500190734863, "completions/min_length": 49.75, "epoch": 0.5718540580789278, "grad_norm": 0.9209272333984663, "kl": 0.3251953125, "learning_rate": 9.94425479361566e-07, "loss": 0.00023231003433465958, "memory(GiB)": 109.69, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 384, "train_speed(iter/s)": 0.029243 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.75, "completions/mean_length": 76.37500190734863, "completions/min_length": 52.75, "epoch": 0.573343261355175, "grad_norm": 1.438720539774675, "kl": 0.30859375, "learning_rate": 9.94390207986927e-07, "loss": -0.0017871439922600985, "memory(GiB)": 109.69, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 385, "train_speed(iter/s)": 0.029272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.75, "completions/mean_length": 73.47916984558105, "completions/min_length": 40.75, "epoch": 0.5748324646314222, "grad_norm": 1.6704742041728124, "kl": 0.32373046875, "learning_rate": 9.943548260080277e-07, "loss": 0.0025156927295029163, "memory(GiB)": 109.69, "reward": 1.447916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 386, "train_speed(iter/s)": 0.029263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 77.19791793823242, "completions/min_length": 49.0, "epoch": 0.5763216679076694, "grad_norm": 2.7989362670107694, "kl": 0.32470703125, "learning_rate": 9.943193334327837e-07, "loss": 0.002080701757222414, "memory(GiB)": 109.69, "reward": 1.7187500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.33919306844472885, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 387, "train_speed(iter/s)": 0.029257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.75, "completions/mean_length": 73.10416793823242, "completions/min_length": 49.0, "epoch": 0.5778108711839166, "grad_norm": 0.005128275362123492, "kl": 0.32275390625, "learning_rate": 9.942837302691358e-07, "loss": 0.00032293135882355273, "memory(GiB)": 109.69, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 388, "train_speed(iter/s)": 0.029285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/mean_length": 72.94791984558105, "completions/min_length": 43.0, "epoch": 0.5793000744601638, "grad_norm": 1.7594626245701472, "kl": 0.34228515625, "learning_rate": 9.942480165250487e-07, "loss": 0.0031305020675063133, "memory(GiB)": 109.69, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 389, "train_speed(iter/s)": 0.029258 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.75, "completions/mean_length": 74.33333587646484, "completions/min_length": 47.75, "epoch": 0.580789277736411, "grad_norm": 0.005042541659770717, "kl": 0.31591796875, "learning_rate": 9.942121922085121e-07, "loss": 0.0003157412284053862, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 390, "train_speed(iter/s)": 0.029288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/mean_length": 74.14583587646484, "completions/min_length": 44.25, "epoch": 0.5822784810126582, "grad_norm": 1.6626837682869604, "kl": 0.328125, "learning_rate": 9.94176257327541e-07, "loss": 0.0004618996172212064, "memory(GiB)": 109.69, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 391, "train_speed(iter/s)": 0.029269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.5, "completions/mean_length": 71.13541984558105, "completions/min_length": 49.25, "epoch": 0.5837676842889055, "grad_norm": 0.006256301899602829, "kl": 0.3310546875, "learning_rate": 9.941402118901742e-07, "loss": 0.0003308586892671883, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 392, "train_speed(iter/s)": 0.029276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.25, "completions/mean_length": 68.27083587646484, "completions/min_length": 40.25, "epoch": 0.5852568875651526, "grad_norm": 1.1725240641199706, "kl": 0.3828125, "learning_rate": 9.941040559044761e-07, "loss": 0.002832036931067705, "memory(GiB)": 109.69, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 393, "train_speed(iter/s)": 0.029285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 68.68750190734863, "completions/min_length": 42.25, "epoch": 0.5867460908413998, "grad_norm": 0.005253667646816108, "kl": 0.34765625, "learning_rate": 9.940677893785353e-07, "loss": 0.0003474477562122047, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 394, "train_speed(iter/s)": 0.029297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.5, "completions/mean_length": 71.42708587646484, "completions/min_length": 48.5, "epoch": 0.5882352941176471, "grad_norm": 2.77703347044129, "kl": 0.345703125, "learning_rate": 9.940314123204653e-07, "loss": -0.004364117980003357, "memory(GiB)": 109.69, "reward": 1.6875000298023224, "reward_std": 0.15571126155555248, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.2947959266602993, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 395, "train_speed(iter/s)": 0.029266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/mean_length": 68.84375190734863, "completions/min_length": 47.0, "epoch": 0.5897244973938943, "grad_norm": 1.1996936107730911, "kl": 0.3564453125, "learning_rate": 9.939949247384045e-07, "loss": 0.0031666462309658527, "memory(GiB)": 109.69, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 396, "train_speed(iter/s)": 0.029256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.5, "completions/mean_length": 66.05208587646484, "completions/min_length": 40.75, "epoch": 0.5912137006701415, "grad_norm": 0.005978507193007681, "kl": 0.37451171875, "learning_rate": 9.939583266405157e-07, "loss": 0.0003737526130862534, "memory(GiB)": 109.69, "reward": 1.2500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.2500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 397, "train_speed(iter/s)": 0.029236 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 69.60416793823242, "completions/min_length": 47.25, "epoch": 0.5927029039463887, "grad_norm": 1.5066177980229485, "kl": 0.34912109375, "learning_rate": 9.939216180349863e-07, "loss": 0.00392397865653038, "memory(GiB)": 109.69, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 398, "train_speed(iter/s)": 0.029227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 67.36458683013916, "completions/min_length": 40.0, "epoch": 0.5941921072226359, "grad_norm": 2.843102525055044, "kl": 0.359375, "learning_rate": 9.93884798930029e-07, "loss": 0.004893853794783354, "memory(GiB)": 109.69, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 399, "train_speed(iter/s)": 0.029223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.25, "completions/mean_length": 63.84375286102295, "completions/min_length": 43.25, "epoch": 0.5956813104988831, "grad_norm": 1.1522380416840743, "kl": 0.37353515625, "learning_rate": 9.938478693338811e-07, "loss": -0.0026009660214185715, "memory(GiB)": 109.69, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 400, "train_speed(iter/s)": 0.029203 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.25, "completions/mean_length": 65.56250190734863, "completions/min_length": 41.5, "epoch": 0.5971705137751303, "grad_norm": 0.9742827095438011, "kl": 0.3681640625, "learning_rate": 9.938108292548044e-07, "loss": -0.010752650909125805, "memory(GiB)": 109.69, "reward": 1.5208334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 401, "train_speed(iter/s)": 0.029211 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.75, "completions/mean_length": 66.27083587646484, "completions/min_length": 46.0, "epoch": 0.5986597170513775, "grad_norm": 1.165073379723089, "kl": 0.37744140625, "learning_rate": 9.937736787010849e-07, "loss": 0.0010713229421526194, "memory(GiB)": 109.69, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 402, "train_speed(iter/s)": 0.029152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/mean_length": 65.61458396911621, "completions/min_length": 43.75, "epoch": 0.6001489203276247, "grad_norm": 1.8399525425566365, "kl": 0.36181640625, "learning_rate": 9.937364176810346e-07, "loss": 0.007190527394413948, "memory(GiB)": 109.69, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 403, "train_speed(iter/s)": 0.029182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 69.92708778381348, "completions/min_length": 45.0, "epoch": 0.6016381236038719, "grad_norm": 1.435055053125416, "kl": 0.3623046875, "learning_rate": 9.936990462029887e-07, "loss": 0.0036949957720935345, "memory(GiB)": 109.69, "reward": 1.5208334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 404, "train_speed(iter/s)": 0.029203 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.25, "completions/mean_length": 70.15625190734863, "completions/min_length": 46.25, "epoch": 0.6031273268801192, "grad_norm": 1.8907834649816573, "kl": 0.37060546875, "learning_rate": 9.936615642753086e-07, "loss": 0.0014626914635300636, "memory(GiB)": 109.69, "reward": 1.8020833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 405, "train_speed(iter/s)": 0.029191 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.5, "completions/mean_length": 69.01042079925537, "completions/min_length": 45.25, "epoch": 0.6046165301563663, "grad_norm": 1.598591759631849, "kl": 0.3447265625, "learning_rate": 9.936239719063794e-07, "loss": 0.0006741317920386791, "memory(GiB)": 109.69, "reward": 1.5208334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 406, "train_speed(iter/s)": 0.029156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.5, "completions/mean_length": 67.48958587646484, "completions/min_length": 47.75, "epoch": 0.6061057334326135, "grad_norm": 1.3676419371829664, "kl": 0.3701171875, "learning_rate": 9.935862691046113e-07, "loss": 0.0014915191568434238, "memory(GiB)": 109.69, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 407, "train_speed(iter/s)": 0.029146 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/mean_length": 66.9166669845581, "completions/min_length": 42.5, "epoch": 0.6075949367088608, "grad_norm": 1.5342248078087908, "kl": 0.36572265625, "learning_rate": 9.93548455878439e-07, "loss": -0.0009304677951149642, "memory(GiB)": 109.69, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 408, "train_speed(iter/s)": 0.029155 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 69.30208492279053, "completions/min_length": 44.5, "epoch": 0.609084139985108, "grad_norm": 3.2792757124691936, "kl": 0.359375, "learning_rate": 9.93510532236322e-07, "loss": -0.0005830636364407837, "memory(GiB)": 109.69, "reward": 1.7916667461395264, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.381936639547348, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 409, "train_speed(iter/s)": 0.029165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.75, "completions/mean_length": 68.27083587646484, "completions/min_length": 41.25, "epoch": 0.6105733432613552, "grad_norm": 0.0053102421233328706, "kl": 0.35400390625, "learning_rate": 9.934724981867446e-07, "loss": 0.00035332361585460603, "memory(GiB)": 109.69, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 410, "train_speed(iter/s)": 0.029151 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 69.72916984558105, "completions/min_length": 45.75, "epoch": 0.6120625465376024, "grad_norm": 2.2809422740639196, "kl": 0.35888671875, "learning_rate": 9.934343537382158e-07, "loss": 0.0032367396634072065, "memory(GiB)": 109.69, "reward": 1.4895834028720856, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.4895833507180214, "rewards/CineAccuracyORM/std": 0.4703870266675949, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 411, "train_speed(iter/s)": 0.029161 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 70.08333587646484, "completions/min_length": 39.0, "epoch": 0.6135517498138496, "grad_norm": 0.0049753218491603085, "kl": 0.34033203125, "learning_rate": 9.93396098899269e-07, "loss": 0.00033993879333138466, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 412, "train_speed(iter/s)": 0.029111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.25, "completions/mean_length": 66.80208778381348, "completions/min_length": 45.25, "epoch": 0.6150409530900968, "grad_norm": 1.1034863118006597, "kl": 0.341796875, "learning_rate": 9.933577336784628e-07, "loss": 0.003409720491617918, "memory(GiB)": 109.69, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 413, "train_speed(iter/s)": 0.029139 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 68.03125190734863, "completions/min_length": 41.75, "epoch": 0.616530156366344, "grad_norm": 0.005386746734514771, "kl": 0.35693359375, "learning_rate": 9.933192580843798e-07, "loss": 0.0003566804516594857, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 414, "train_speed(iter/s)": 0.029128 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.25, "completions/mean_length": 67.93750381469727, "completions/min_length": 42.25, "epoch": 0.6180193596425912, "grad_norm": 0.0047194140632760475, "kl": 0.36083984375, "learning_rate": 9.932806721256284e-07, "loss": 0.0003608259721659124, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 415, "train_speed(iter/s)": 0.029155 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.5, "completions/mean_length": 70.30208396911621, "completions/min_length": 36.75, "epoch": 0.6195085629188384, "grad_norm": 2.279792499201208, "kl": 0.33154296875, "learning_rate": 9.932419758108403e-07, "loss": -0.00471582543104887, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.5000000102445483, "rewards/CineAccuracyORM/std": 0.38177741691470146, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 416, "train_speed(iter/s)": 0.029165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.5, "completions/mean_length": 68.25000190734863, "completions/min_length": 48.25, "epoch": 0.6209977661950856, "grad_norm": 1.3649245528663556, "kl": 0.37939453125, "learning_rate": 9.93203169148673e-07, "loss": -0.0031228442676365376, "memory(GiB)": 109.69, "reward": 1.2916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.2916666716337204, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 417, "train_speed(iter/s)": 0.029169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 70.57291984558105, "completions/min_length": 46.5, "epoch": 0.6224869694713329, "grad_norm": 0.005393797514072114, "kl": 0.33056640625, "learning_rate": 9.931642521478079e-07, "loss": 0.0003309114836156368, "memory(GiB)": 109.69, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 418, "train_speed(iter/s)": 0.029195 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.0, "completions/mean_length": 70.72916984558105, "completions/min_length": 44.25, "epoch": 0.62397617274758, "grad_norm": 2.1116704058997673, "kl": 0.32421875, "learning_rate": 9.931252248169517e-07, "loss": 0.014389682561159134, "memory(GiB)": 109.69, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 419, "train_speed(iter/s)": 0.02921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 68.21875381469727, "completions/min_length": 45.5, "epoch": 0.6254653760238272, "grad_norm": 0.005564753279952304, "kl": 0.37353515625, "learning_rate": 9.930860871648355e-07, "loss": 0.0003741420805454254, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 420, "train_speed(iter/s)": 0.029196 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.5, "completions/mean_length": 68.61458778381348, "completions/min_length": 46.0, "epoch": 0.6269545793000745, "grad_norm": 2.609924638532079, "kl": 0.36083984375, "learning_rate": 9.930468392002153e-07, "loss": 0.003241155995056033, "memory(GiB)": 109.69, "reward": 1.7916666865348816, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 421, "train_speed(iter/s)": 0.02918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.5, "completions/mean_length": 68.42708587646484, "completions/min_length": 43.0, "epoch": 0.6284437825763217, "grad_norm": 1.3310711805877864, "kl": 0.35498046875, "learning_rate": 9.930074809318714e-07, "loss": 0.005862519145011902, "memory(GiB)": 109.69, "reward": 1.7083333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 422, "train_speed(iter/s)": 0.029152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/mean_length": 67.20833587646484, "completions/min_length": 42.25, "epoch": 0.6299329858525688, "grad_norm": 2.897417110017556, "kl": 0.36376953125, "learning_rate": 9.92968012368609e-07, "loss": 0.008487120270729065, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.12483403459191322, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3568698540329933, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 423, "train_speed(iter/s)": 0.029132 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 67.88541984558105, "completions/min_length": 42.25, "epoch": 0.6314221891288161, "grad_norm": 2.3739204493999013, "kl": 0.34521484375, "learning_rate": 9.929284335192576e-07, "loss": -0.00011366605758666992, "memory(GiB)": 109.69, "reward": 1.5520833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.4920940324664116, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 424, "train_speed(iter/s)": 0.029139 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.5, "completions/mean_length": 72.05208587646484, "completions/min_length": 46.5, "epoch": 0.6329113924050633, "grad_norm": 0.005436917411160659, "kl": 0.3701171875, "learning_rate": 9.928887443926725e-07, "loss": 0.0003704245900735259, "memory(GiB)": 109.69, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 425, "train_speed(iter/s)": 0.029123 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 67.80208778381348, "completions/min_length": 40.0, "epoch": 0.6344005956813105, "grad_norm": 0.006297587276661003, "kl": 0.37353515625, "learning_rate": 9.928489449977323e-07, "loss": 0.0003733660269062966, "memory(GiB)": 109.69, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 426, "train_speed(iter/s)": 0.029131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.5, "completions/mean_length": 67.84375381469727, "completions/min_length": 45.5, "epoch": 0.6358897989575577, "grad_norm": 1.5684519275812783, "kl": 0.3720703125, "learning_rate": 9.92809035343341e-07, "loss": 0.001309490529820323, "memory(GiB)": 109.69, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 427, "train_speed(iter/s)": 0.029086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 67.57291984558105, "completions/min_length": 42.75, "epoch": 0.637379002233805, "grad_norm": 0.005707069764962157, "kl": 0.38232421875, "learning_rate": 9.927690154384272e-07, "loss": 0.00038215675158426166, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 428, "train_speed(iter/s)": 0.029095 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.5, "completions/mean_length": 68.12500381469727, "completions/min_length": 40.25, "epoch": 0.6388682055100521, "grad_norm": 2.4993772995836996, "kl": 0.37646484375, "learning_rate": 9.927288852919442e-07, "loss": -0.01117224246263504, "memory(GiB)": 109.69, "reward": 1.9062500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.18665887415409088, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 429, "train_speed(iter/s)": 0.029089 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.5, "completions/mean_length": 64.90625190734863, "completions/min_length": 43.25, "epoch": 0.6403574087862993, "grad_norm": 0.005910151752597627, "kl": 0.3818359375, "learning_rate": 9.926886449128696e-07, "loss": 0.00038153957575559616, "memory(GiB)": 109.69, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 430, "train_speed(iter/s)": 0.029081 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.75, "completions/mean_length": 63.78125190734863, "completions/min_length": 41.5, "epoch": 0.6418466120625466, "grad_norm": 1.183871784248123, "kl": 0.396484375, "learning_rate": 9.92648294310206e-07, "loss": -0.000773103442043066, "memory(GiB)": 109.69, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166744276881, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 431, "train_speed(iter/s)": 0.029073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 63.760419845581055, "completions/min_length": 39.75, "epoch": 0.6433358153387937, "grad_norm": 2.4996115729451596, "kl": 0.39208984375, "learning_rate": 9.926078334929806e-07, "loss": 0.0017438861541450024, "memory(GiB)": 109.69, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 432, "train_speed(iter/s)": 0.029072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/mean_length": 63.94791889190674, "completions/min_length": 44.25, "epoch": 0.6448250186150409, "grad_norm": 2.6095179240083874, "kl": 0.380859375, "learning_rate": 9.925672624702452e-07, "loss": 0.0027205748483538628, "memory(GiB)": 109.69, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 433, "train_speed(iter/s)": 0.029064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.75, "completions/mean_length": 65.15625190734863, "completions/min_length": 44.25, "epoch": 0.6463142218912882, "grad_norm": 1.8601227410623065, "kl": 0.39501953125, "learning_rate": 9.925265812510764e-07, "loss": 0.010990663431584835, "memory(GiB)": 109.69, "reward": 1.5937500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.4749870151281357, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 434, "train_speed(iter/s)": 0.029056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.5, "completions/mean_length": 62.604169845581055, "completions/min_length": 38.0, "epoch": 0.6478034251675354, "grad_norm": 0.005707710270675859, "kl": 0.39306640625, "learning_rate": 9.924857898445753e-07, "loss": 0.00039274568553082645, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 435, "train_speed(iter/s)": 0.029054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 63.82291889190674, "completions/min_length": 40.0, "epoch": 0.6492926284437825, "grad_norm": 2.4305209861378367, "kl": 0.38037109375, "learning_rate": 9.924448882598678e-07, "loss": -0.008921308442950249, "memory(GiB)": 109.69, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 436, "train_speed(iter/s)": 0.029063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/mean_length": 59.76041889190674, "completions/min_length": 39.75, "epoch": 0.6507818317200298, "grad_norm": 0.006299600807084033, "kl": 0.38232421875, "learning_rate": 9.92403876506104e-07, "loss": 0.0003823398146778345, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 437, "train_speed(iter/s)": 0.029053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.25, "completions/mean_length": 61.44791793823242, "completions/min_length": 38.75, "epoch": 0.652271034996277, "grad_norm": 0.006205582990440889, "kl": 0.38623046875, "learning_rate": 9.923627545924593e-07, "loss": 0.0003857490955851972, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 438, "train_speed(iter/s)": 0.029048 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.25, "completions/mean_length": 59.57291793823242, "completions/min_length": 35.75, "epoch": 0.6537602382725242, "grad_norm": 0.006018385140259104, "kl": 0.43994140625, "learning_rate": 9.923215225281335e-07, "loss": 0.00044097809586673975, "memory(GiB)": 109.69, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 439, "train_speed(iter/s)": 0.029023 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.25, "completions/mean_length": 64.43750190734863, "completions/min_length": 39.25, "epoch": 0.6552494415487714, "grad_norm": 2.563995358873083, "kl": 0.39990234375, "learning_rate": 9.922801803223504e-07, "loss": -0.0014009479200467467, "memory(GiB)": 109.69, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 440, "train_speed(iter/s)": 0.028988 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 63.552085876464844, "completions/min_length": 39.0, "epoch": 0.6567386448250186, "grad_norm": 0.005849558923329354, "kl": 0.42138671875, "learning_rate": 9.922387279843598e-07, "loss": 0.00042159995064139366, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 441, "train_speed(iter/s)": 0.028943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/mean_length": 61.031250953674316, "completions/min_length": 40.75, "epoch": 0.6582278481012658, "grad_norm": 1.7574335805316834, "kl": 0.3828125, "learning_rate": 9.92197165523435e-07, "loss": 0.00037908085505478084, "memory(GiB)": 109.69, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 442, "train_speed(iter/s)": 0.028954 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 63.156250953674316, "completions/min_length": 40.0, "epoch": 0.659717051377513, "grad_norm": 2.4593508067357064, "kl": 1.56494140625, "learning_rate": 9.92155492948874e-07, "loss": 0.007221059408038855, "memory(GiB)": 109.69, "reward": 1.7187500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 443, "train_speed(iter/s)": 0.02898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 60.27083396911621, "completions/min_length": 39.75, "epoch": 0.6612062546537603, "grad_norm": 2.5469918876292827, "kl": 0.37744140625, "learning_rate": 9.921137102700002e-07, "loss": -0.00048584816977381706, "memory(GiB)": 109.69, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 444, "train_speed(iter/s)": 0.029008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 63.29166793823242, "completions/min_length": 38.5, "epoch": 0.6626954579300074, "grad_norm": 0.8480144471792377, "kl": 0.404296875, "learning_rate": 9.92071817496161e-07, "loss": -0.012287060730159283, "memory(GiB)": 109.69, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 445, "train_speed(iter/s)": 0.028969 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.25, "completions/mean_length": 62.21875190734863, "completions/min_length": 40.75, "epoch": 0.6641846612062546, "grad_norm": 2.0518117212461315, "kl": 0.38134765625, "learning_rate": 9.920298146367286e-07, "loss": -0.00924745760858059, "memory(GiB)": 109.69, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 446, "train_speed(iter/s)": 0.02898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.25, "completions/mean_length": 66.60416984558105, "completions/min_length": 44.75, "epoch": 0.6656738644825019, "grad_norm": 0.005947990098072252, "kl": 0.37353515625, "learning_rate": 9.919877017010995e-07, "loss": 0.00037293456261977553, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 447, "train_speed(iter/s)": 0.028981 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 65.86458683013916, "completions/min_length": 45.75, "epoch": 0.6671630677587491, "grad_norm": 0.005916334625613139, "kl": 0.36181640625, "learning_rate": 9.919454786986959e-07, "loss": 0.00036151905078440905, "memory(GiB)": 109.69, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 448, "train_speed(iter/s)": 0.02899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 65.92708492279053, "completions/min_length": 40.0, "epoch": 0.6686522710349962, "grad_norm": 1.1214723295293447, "kl": 0.3720703125, "learning_rate": 9.91903145638963e-07, "loss": 0.010686945170164108, "memory(GiB)": 109.69, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 449, "train_speed(iter/s)": 0.028987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.5, "completions/mean_length": 62.937500953674316, "completions/min_length": 40.5, "epoch": 0.6701414743112435, "grad_norm": 0.005811926026615889, "kl": 0.37353515625, "learning_rate": 9.91860702531372e-07, "loss": 0.00037399641587398946, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 450, "train_speed(iter/s)": 0.028979 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.5, "completions/mean_length": 63.85416889190674, "completions/min_length": 37.0, "epoch": 0.6716306775874907, "grad_norm": 1.6772894593029775, "kl": 0.34814453125, "learning_rate": 9.918181493854183e-07, "loss": -0.002440151060000062, "memory(GiB)": 109.69, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 451, "train_speed(iter/s)": 0.028975 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 66.00000190734863, "completions/min_length": 35.75, "epoch": 0.6731198808637379, "grad_norm": 1.4068824350162792, "kl": 0.3671875, "learning_rate": 9.917754862106215e-07, "loss": 0.007178551517426968, "memory(GiB)": 109.69, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 452, "train_speed(iter/s)": 0.028965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.25, "completions/mean_length": 64.73958587646484, "completions/min_length": 47.25, "epoch": 0.6746090841399851, "grad_norm": 0.0063107665814623225, "kl": 0.37255859375, "learning_rate": 9.917327130165262e-07, "loss": 0.0003718466032296419, "memory(GiB)": 109.69, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 453, "train_speed(iter/s)": 0.028957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.0, "completions/mean_length": 65.47916889190674, "completions/min_length": 45.25, "epoch": 0.6760982874162323, "grad_norm": 0.005717931525788821, "kl": 0.38134765625, "learning_rate": 9.916898298127017e-07, "loss": 0.00038177071837708354, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 454, "train_speed(iter/s)": 0.028984 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.25, "completions/mean_length": 67.41666984558105, "completions/min_length": 43.75, "epoch": 0.6775874906924795, "grad_norm": 0.006578224496051033, "kl": 0.38623046875, "learning_rate": 9.916468366087416e-07, "loss": 0.0003859233111143112, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 455, "train_speed(iter/s)": 0.028999 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 66.32291793823242, "completions/min_length": 42.25, "epoch": 0.6790766939687267, "grad_norm": 0.006211335686944857, "kl": 0.3564453125, "learning_rate": 9.916037334142646e-07, "loss": 0.0003560362383723259, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 456, "train_speed(iter/s)": 0.028997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.5, "completions/mean_length": 66.76041889190674, "completions/min_length": 44.25, "epoch": 0.680565897244974, "grad_norm": 0.006098050574362732, "kl": 0.35205078125, "learning_rate": 9.915605202389132e-07, "loss": 0.0003522754996083677, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 457, "train_speed(iter/s)": 0.029005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 67.26041984558105, "completions/min_length": 47.25, "epoch": 0.6820551005212211, "grad_norm": 0.005877371886400264, "kl": 0.34912109375, "learning_rate": 9.915171970923556e-07, "loss": 0.0003497542056720704, "memory(GiB)": 109.69, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 458, "train_speed(iter/s)": 0.029012 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/mean_length": 69.88541793823242, "completions/min_length": 46.0, "epoch": 0.6835443037974683, "grad_norm": 1.8216187490044642, "kl": 0.35546875, "learning_rate": 9.914737639842832e-07, "loss": 0.0014504207065328956, "memory(GiB)": 109.69, "reward": 1.7291666865348816, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 459, "train_speed(iter/s)": 0.029008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.5, "completions/mean_length": 68.67708587646484, "completions/min_length": 45.25, "epoch": 0.6850335070737156, "grad_norm": 0.9099660673405044, "kl": 0.3896484375, "learning_rate": 9.914302209244135e-07, "loss": 4.586420254781842e-05, "memory(GiB)": 109.69, "reward": 1.4375000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.4375000074505806, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 460, "train_speed(iter/s)": 0.028984 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.25, "completions/mean_length": 69.02083396911621, "completions/min_length": 43.75, "epoch": 0.6865227103499628, "grad_norm": 1.421687990264279, "kl": 0.376953125, "learning_rate": 9.913865679224875e-07, "loss": -0.014082828536629677, "memory(GiB)": 109.69, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 461, "train_speed(iter/s)": 0.028987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 70.76041984558105, "completions/min_length": 45.75, "epoch": 0.68801191362621, "grad_norm": 2.333473950486944, "kl": 0.33349609375, "learning_rate": 9.913428049882714e-07, "loss": 0.0039159758016467094, "memory(GiB)": 109.69, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 462, "train_speed(iter/s)": 0.029006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.25, "completions/mean_length": 69.82291984558105, "completions/min_length": 45.5, "epoch": 0.6895011169024572, "grad_norm": 2.486430201407791, "kl": 0.3310546875, "learning_rate": 9.912989321315557e-07, "loss": 0.010056126862764359, "memory(GiB)": 109.69, "reward": 1.7604166865348816, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 463, "train_speed(iter/s)": 0.029031 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.25, "completions/mean_length": 69.23958587646484, "completions/min_length": 41.75, "epoch": 0.6909903201787044, "grad_norm": 1.0855935798686083, "kl": 0.357421875, "learning_rate": 9.912549493621554e-07, "loss": -0.00016892608255147934, "memory(GiB)": 109.69, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 464, "train_speed(iter/s)": 0.02904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.25, "completions/mean_length": 67.87500381469727, "completions/min_length": 40.75, "epoch": 0.6924795234549516, "grad_norm": 2.2322339475694073, "kl": 0.3662109375, "learning_rate": 9.912108566899106e-07, "loss": 0.00658311415463686, "memory(GiB)": 109.69, "reward": 1.8125000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 465, "train_speed(iter/s)": 0.029049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 72.36458396911621, "completions/min_length": 42.5, "epoch": 0.6939687267311988, "grad_norm": 0.005488692404577618, "kl": 0.35302734375, "learning_rate": 9.911666541246855e-07, "loss": 0.0003532016125973314, "memory(GiB)": 109.69, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 466, "train_speed(iter/s)": 0.029056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 68.50000381469727, "completions/min_length": 47.0, "epoch": 0.695457930007446, "grad_norm": 2.0763355155380405, "kl": 0.37158203125, "learning_rate": 9.911223416763687e-07, "loss": -0.001996101811528206, "memory(GiB)": 109.69, "reward": 1.5104167461395264, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.48275065422058105, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 467, "train_speed(iter/s)": 0.029054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.25, "completions/mean_length": 68.68750381469727, "completions/min_length": 46.0, "epoch": 0.6969471332836932, "grad_norm": 0.03574197198223806, "kl": 0.4013671875, "learning_rate": 9.910779193548744e-07, "loss": 0.00040169942076317966, "memory(GiB)": 109.69, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 468, "train_speed(iter/s)": 0.029046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.75, "completions/mean_length": 69.36458396911621, "completions/min_length": 44.75, "epoch": 0.6984363365599404, "grad_norm": 1.7757575778084225, "kl": 0.34033203125, "learning_rate": 9.910333871701401e-07, "loss": 0.002023501554504037, "memory(GiB)": 109.69, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 469, "train_speed(iter/s)": 0.029023 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.0, "completions/mean_length": 73.53125190734863, "completions/min_length": 46.75, "epoch": 0.6999255398361877, "grad_norm": 2.749188705592424, "kl": 0.3564453125, "learning_rate": 9.909887451321287e-07, "loss": 0.004765661433339119, "memory(GiB)": 109.69, "reward": 1.6458333730697632, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.3712640330195427, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 470, "train_speed(iter/s)": 0.02898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 68.28125190734863, "completions/min_length": 42.0, "epoch": 0.7014147431124349, "grad_norm": 1.7486113507099235, "kl": 0.32958984375, "learning_rate": 9.909439932508274e-07, "loss": -0.011177964508533478, "memory(GiB)": 109.69, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 471, "train_speed(iter/s)": 0.029005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/mean_length": 73.45833587646484, "completions/min_length": 49.75, "epoch": 0.702903946388682, "grad_norm": 0.0063688305890759215, "kl": 0.34814453125, "learning_rate": 9.908991315362483e-07, "loss": 0.00034783093724399805, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 472, "train_speed(iter/s)": 0.02898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 67.93750190734863, "completions/min_length": 47.5, "epoch": 0.7043931496649293, "grad_norm": 0.006475472977771319, "kl": 0.37353515625, "learning_rate": 9.908541599984275e-07, "loss": 0.0003735580248758197, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 473, "train_speed(iter/s)": 0.02898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.25, "completions/mean_length": 70.52083492279053, "completions/min_length": 41.75, "epoch": 0.7058823529411765, "grad_norm": 0.006132357505652655, "kl": 0.3310546875, "learning_rate": 9.908090786474258e-07, "loss": 0.0003308887535240501, "memory(GiB)": 109.69, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 474, "train_speed(iter/s)": 0.028989 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.75, "completions/mean_length": 69.92708587646484, "completions/min_length": 44.5, "epoch": 0.7073715562174236, "grad_norm": 0.01243998986702459, "kl": 0.3359375, "learning_rate": 9.907638874933292e-07, "loss": 0.0003356006636749953, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 475, "train_speed(iter/s)": 0.029014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 74.61458587646484, "completions/min_length": 53.5, "epoch": 0.7088607594936709, "grad_norm": 0.005843523996563713, "kl": 0.34326171875, "learning_rate": 9.907185865462475e-07, "loss": 0.00034282912383787334, "memory(GiB)": 109.69, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 476, "train_speed(iter/s)": 0.029032 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 75.32291793823242, "completions/min_length": 46.0, "epoch": 0.7103499627699181, "grad_norm": 0.9439897853577076, "kl": 0.32568359375, "learning_rate": 9.906731758163154e-07, "loss": 0.043720345944166183, "memory(GiB)": 109.69, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 477, "train_speed(iter/s)": 0.02902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 70.02083396911621, "completions/min_length": 39.75, "epoch": 0.7118391660461653, "grad_norm": 1.7714957434741758, "kl": 0.3388671875, "learning_rate": 9.906276553136922e-07, "loss": -0.003491933224722743, "memory(GiB)": 109.69, "reward": 1.6770834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 478, "train_speed(iter/s)": 0.02901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 72.50000190734863, "completions/min_length": 46.0, "epoch": 0.7133283693224125, "grad_norm": 1.7318488192257555, "kl": 0.3623046875, "learning_rate": 9.905820250485617e-07, "loss": -0.0014997029211372137, "memory(GiB)": 109.69, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 479, "train_speed(iter/s)": 0.029012 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 70.09375381469727, "completions/min_length": 46.25, "epoch": 0.7148175725986597, "grad_norm": 1.3912840488809841, "kl": 0.34228515625, "learning_rate": 9.905362850311322e-07, "loss": 0.011358723044395447, "memory(GiB)": 109.69, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 480, "train_speed(iter/s)": 0.02902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 72.76041793823242, "completions/min_length": 44.25, "epoch": 0.7163067758749069, "grad_norm": 0.005958445118547403, "kl": 0.3173828125, "learning_rate": 9.904904352716363e-07, "loss": 0.000317379308398813, "memory(GiB)": 109.69, "reward": 1.4166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.4166666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 481, "train_speed(iter/s)": 0.029028 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 72.88541984558105, "completions/min_length": 45.75, "epoch": 0.7177959791511541, "grad_norm": 1.4727204582062636, "kl": 0.32568359375, "learning_rate": 9.90444475780332e-07, "loss": -0.00916432123631239, "memory(GiB)": 109.69, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 482, "train_speed(iter/s)": 0.029008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.5, "completions/mean_length": 75.43750190734863, "completions/min_length": 50.5, "epoch": 0.7192851824274014, "grad_norm": 0.005770597866734011, "kl": 0.33740234375, "learning_rate": 9.903984065675009e-07, "loss": 0.0003369557380210608, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 483, "train_speed(iter/s)": 0.029014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 77.26041984558105, "completions/min_length": 49.75, "epoch": 0.7207743857036486, "grad_norm": 1.3927719524361941, "kl": 0.33447265625, "learning_rate": 9.903522276434495e-07, "loss": 0.0006804857403039932, "memory(GiB)": 109.69, "reward": 1.8125000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.32092025876045227, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 484, "train_speed(iter/s)": 0.029001 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 75.30208778381348, "completions/min_length": 48.25, "epoch": 0.7222635889798957, "grad_norm": 0.005971344733532083, "kl": 0.326171875, "learning_rate": 9.903059390185093e-07, "loss": 0.0003258895012550056, "memory(GiB)": 109.69, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 485, "train_speed(iter/s)": 0.029026 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.5, "completions/mean_length": 75.44791984558105, "completions/min_length": 47.75, "epoch": 0.723752792256143, "grad_norm": 1.3306453869269999, "kl": 0.31884765625, "learning_rate": 9.902595407030355e-07, "loss": 0.00874620396643877, "memory(GiB)": 109.69, "reward": 1.90625, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 486, "train_speed(iter/s)": 0.029049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 77.44792175292969, "completions/min_length": 48.25, "epoch": 0.7252419955323902, "grad_norm": 0.005554320843861757, "kl": 0.341796875, "learning_rate": 9.902130327074084e-07, "loss": 0.0003416425024624914, "memory(GiB)": 109.69, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 487, "train_speed(iter/s)": 0.029072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.75, "completions/mean_length": 76.53125190734863, "completions/min_length": 50.5, "epoch": 0.7267311988086373, "grad_norm": 1.68773352613284, "kl": 0.34326171875, "learning_rate": 9.901664150420326e-07, "loss": -0.012158982455730438, "memory(GiB)": 109.69, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 488, "train_speed(iter/s)": 0.029095 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.75, "completions/mean_length": 73.86458587646484, "completions/min_length": 51.75, "epoch": 0.7282204020848846, "grad_norm": 0.006029163553568697, "kl": 0.3408203125, "learning_rate": 9.901196877173374e-07, "loss": 0.00034084124490618706, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 489, "train_speed(iter/s)": 0.029097 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/mean_length": 75.13541793823242, "completions/min_length": 46.5, "epoch": 0.7297096053611318, "grad_norm": 0.005741409533374832, "kl": 0.32763671875, "learning_rate": 9.900728507437767e-07, "loss": 0.00032776687294244766, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 490, "train_speed(iter/s)": 0.02912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.75, "completions/mean_length": 75.27083587646484, "completions/min_length": 43.75, "epoch": 0.731198808637379, "grad_norm": 0.005720008762659998, "kl": 0.33935546875, "learning_rate": 9.900259041318288e-07, "loss": 0.00033889361657202244, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 491, "train_speed(iter/s)": 0.029112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.5, "completions/mean_length": 72.11458587646484, "completions/min_length": 46.5, "epoch": 0.7326880119136262, "grad_norm": 2.018909328342689, "kl": 0.3447265625, "learning_rate": 9.899788478919963e-07, "loss": -0.008626895025372505, "memory(GiB)": 109.69, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 492, "train_speed(iter/s)": 0.029136 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/mean_length": 74.85416793823242, "completions/min_length": 51.0, "epoch": 0.7341772151898734, "grad_norm": 0.007009788642170341, "kl": 0.32861328125, "learning_rate": 9.89931682034807e-07, "loss": 0.000328604131937027, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 493, "train_speed(iter/s)": 0.02916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/mean_length": 77.40625381469727, "completions/min_length": 52.0, "epoch": 0.7356664184661206, "grad_norm": 1.0901841586899854, "kl": 0.33056640625, "learning_rate": 9.898844065708121e-07, "loss": -0.001606643432751298, "memory(GiB)": 109.69, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 494, "train_speed(iter/s)": 0.029184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 77.85416793823242, "completions/min_length": 56.0, "epoch": 0.7371556217423678, "grad_norm": 0.006614096611787554, "kl": 0.3193359375, "learning_rate": 9.898370215105885e-07, "loss": 0.00031911267433315516, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 495, "train_speed(iter/s)": 0.02914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 75.39583396911621, "completions/min_length": 54.25, "epoch": 0.7386448250186151, "grad_norm": 0.005881802107560495, "kl": 0.33544921875, "learning_rate": 9.89789526864737e-07, "loss": 0.00033563282340765, "memory(GiB)": 109.69, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 496, "train_speed(iter/s)": 0.029148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 81.89583396911621, "completions/min_length": 55.5, "epoch": 0.7401340282948623, "grad_norm": 0.006348966610872311, "kl": 0.33349609375, "learning_rate": 9.89741922643883e-07, "loss": 0.0003334124921821058, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 497, "train_speed(iter/s)": 0.02912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 81.13541793823242, "completions/min_length": 53.25, "epoch": 0.7416232315711094, "grad_norm": 0.006970166714444086, "kl": 0.34521484375, "learning_rate": 9.896942088586765e-07, "loss": 0.00034509756369516253, "memory(GiB)": 109.69, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 498, "train_speed(iter/s)": 0.029109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.5, "completions/mean_length": 80.79166984558105, "completions/min_length": 52.75, "epoch": 0.7431124348473567, "grad_norm": 0.00608549585441392, "kl": 0.328125, "learning_rate": 9.896463855197918e-07, "loss": 0.000327774730976671, "memory(GiB)": 109.69, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 499, "train_speed(iter/s)": 0.029114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.25, "completions/mean_length": 80.44791793823242, "completions/min_length": 49.5, "epoch": 0.7446016381236039, "grad_norm": 1.5782536763785986, "kl": 0.341796875, "learning_rate": 9.89598452637928e-07, "loss": 0.006153746508061886, "memory(GiB)": 109.69, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 500, "train_speed(iter/s)": 0.029136 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 80.59375190734863, "completions/min_length": 52.5, "epoch": 0.746090841399851, "grad_norm": 0.005909850829988455, "kl": 0.32470703125, "learning_rate": 9.895504102238087e-07, "loss": 0.0003246703709010035, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 501, "train_speed(iter/s)": 0.029063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 82.32291793823242, "completions/min_length": 54.75, "epoch": 0.7475800446760983, "grad_norm": 1.138429034528655, "kl": 0.3095703125, "learning_rate": 9.895022582881817e-07, "loss": -0.006977283861488104, "memory(GiB)": 109.69, "reward": 1.4375000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.4375000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 502, "train_speed(iter/s)": 0.029043 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 82.35416984558105, "completions/min_length": 55.75, "epoch": 0.7490692479523455, "grad_norm": 1.3404485743878682, "kl": 0.3251953125, "learning_rate": 9.894539968418195e-07, "loss": 0.01871461048722267, "memory(GiB)": 109.69, "reward": 1.635416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.4023842103779316, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 503, "train_speed(iter/s)": 0.029048 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 81.27083587646484, "completions/min_length": 52.5, "epoch": 0.7505584512285927, "grad_norm": 0.4077213477440035, "kl": 0.40625, "learning_rate": 9.894056258955192e-07, "loss": 0.00040642102248966694, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 504, "train_speed(iter/s)": 0.02907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 83.03125190734863, "completions/min_length": 55.25, "epoch": 0.7520476545048399, "grad_norm": 0.006635377137969476, "kl": 0.3212890625, "learning_rate": 9.89357145460102e-07, "loss": 0.00032132217893376946, "memory(GiB)": 109.69, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 505, "train_speed(iter/s)": 0.029075 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 83.36458587646484, "completions/min_length": 58.0, "epoch": 0.7535368577810871, "grad_norm": 1.58974954115534, "kl": 0.3271484375, "learning_rate": 9.893085555464142e-07, "loss": -0.0128876231610775, "memory(GiB)": 109.69, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 506, "train_speed(iter/s)": 0.029081 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 82.27083587646484, "completions/min_length": 56.75, "epoch": 0.7550260610573343, "grad_norm": 0.005810311821149232, "kl": 0.3173828125, "learning_rate": 9.89259856165326e-07, "loss": 0.0003173899604007602, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 507, "train_speed(iter/s)": 0.029103 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 81.05208587646484, "completions/min_length": 56.25, "epoch": 0.7565152643335815, "grad_norm": 0.005823136250659082, "kl": 0.31201171875, "learning_rate": 9.892110473277327e-07, "loss": 0.0003124707436654717, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 508, "train_speed(iter/s)": 0.029125 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 84.43750381469727, "completions/min_length": 59.75, "epoch": 0.7580044676098288, "grad_norm": 0.9151501439668462, "kl": 0.3173828125, "learning_rate": 9.891621290445532e-07, "loss": -0.003916838206350803, "memory(GiB)": 109.69, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 509, "train_speed(iter/s)": 0.02914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 87.03125381469727, "completions/min_length": 56.0, "epoch": 0.759493670886076, "grad_norm": 2.0390156860789697, "kl": 0.3125, "learning_rate": 9.89113101326732e-07, "loss": 0.006236978806555271, "memory(GiB)": 109.69, "reward": 1.635416716337204, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 510, "train_speed(iter/s)": 0.029116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 83.26041984558105, "completions/min_length": 57.5, "epoch": 0.7609828741623231, "grad_norm": 1.0488973991839048, "kl": 0.314453125, "learning_rate": 9.890639641852371e-07, "loss": -0.012439916841685772, "memory(GiB)": 109.69, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 511, "train_speed(iter/s)": 0.029138 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 85.39583396911621, "completions/min_length": 63.5, "epoch": 0.7624720774385704, "grad_norm": 2.264254356308434, "kl": 0.3115234375, "learning_rate": 9.890147176310617e-07, "loss": -0.0027274335734546185, "memory(GiB)": 109.69, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 512, "train_speed(iter/s)": 0.029127 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.0, "completions/mean_length": 82.47916984558105, "completions/min_length": 57.5, "epoch": 0.7639612807148176, "grad_norm": 2.4671180410547375, "kl": 0.30908203125, "learning_rate": 9.889653616752231e-07, "loss": 0.002389671979472041, "memory(GiB)": 109.69, "reward": 1.822916716337204, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.30704472959041595, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 513, "train_speed(iter/s)": 0.029143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 86.05208587646484, "completions/min_length": 58.75, "epoch": 0.7654504839910647, "grad_norm": 0.006904173072033717, "kl": 0.3310546875, "learning_rate": 9.889158963287627e-07, "loss": 0.00033085577888414264, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 514, "train_speed(iter/s)": 0.029119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 85.55208587646484, "completions/min_length": 59.5, "epoch": 0.766939687267312, "grad_norm": 0.005223724858477912, "kl": 0.3134765625, "learning_rate": 9.888663216027475e-07, "loss": 0.0003133923455607146, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 515, "train_speed(iter/s)": 0.029093 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 82.96875190734863, "completions/min_length": 52.0, "epoch": 0.7684288905435592, "grad_norm": 2.672676356676589, "kl": 0.75048828125, "learning_rate": 9.88816637508268e-07, "loss": -0.006284746341407299, "memory(GiB)": 109.69, "reward": 1.6145833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3641507476568222, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 516, "train_speed(iter/s)": 0.029096 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 86.47916793823242, "completions/min_length": 57.0, "epoch": 0.7699180938198064, "grad_norm": 0.005920188342435465, "kl": 0.31201171875, "learning_rate": 9.887668440564393e-07, "loss": 0.00031236937502399087, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 517, "train_speed(iter/s)": 0.029118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.25, "completions/mean_length": 88.69791984558105, "completions/min_length": 58.0, "epoch": 0.7714072970960536, "grad_norm": 1.0031554996643277, "kl": 0.33935546875, "learning_rate": 9.88716941258401e-07, "loss": -0.006914216093719006, "memory(GiB)": 109.69, "reward": 1.4687500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 518, "train_speed(iter/s)": 0.029089 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 88.28125381469727, "completions/min_length": 60.0, "epoch": 0.7728965003723008, "grad_norm": 2.6475891399763056, "kl": 0.33203125, "learning_rate": 9.886669291253178e-07, "loss": 0.00575556606054306, "memory(GiB)": 109.69, "reward": 1.5937500596046448, "reward_std": 0.11572098359465599, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4957045316696167, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 519, "train_speed(iter/s)": 0.029064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 87.87500381469727, "completions/min_length": 58.25, "epoch": 0.774385703648548, "grad_norm": 0.007010745978856906, "kl": 0.3115234375, "learning_rate": 9.886168076683778e-07, "loss": 0.0003108765813522041, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 520, "train_speed(iter/s)": 0.029084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 85.51042175292969, "completions/min_length": 56.75, "epoch": 0.7758749069247952, "grad_norm": 0.6268727504346058, "kl": 0.34326171875, "learning_rate": 9.885665768987945e-07, "loss": 0.0033153609838336706, "memory(GiB)": 109.69, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 521, "train_speed(iter/s)": 0.029061 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 85.52083587646484, "completions/min_length": 58.75, "epoch": 0.7773641102010425, "grad_norm": 0.006805197106118373, "kl": 0.35302734375, "learning_rate": 9.885162368278055e-07, "loss": 0.00035241441219113767, "memory(GiB)": 109.69, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 522, "train_speed(iter/s)": 0.029052 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 86.88541984558105, "completions/min_length": 61.25, "epoch": 0.7788533134772897, "grad_norm": 0.00754671718110196, "kl": 0.32080078125, "learning_rate": 9.884657874666724e-07, "loss": 0.000321154308039695, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 523, "train_speed(iter/s)": 0.029057 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 86.98958587646484, "completions/min_length": 61.25, "epoch": 0.7803425167535368, "grad_norm": 2.166519500069029, "kl": 0.31787109375, "learning_rate": 9.884152288266818e-07, "loss": 0.0010403473861515522, "memory(GiB)": 109.69, "reward": 1.5000000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 524, "train_speed(iter/s)": 0.029076 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 87.55208396911621, "completions/min_length": 56.5, "epoch": 0.7818317200297841, "grad_norm": 0.008305897308175174, "kl": 0.3232421875, "learning_rate": 9.883645609191448e-07, "loss": 0.0003235067706555128, "memory(GiB)": 109.69, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 525, "train_speed(iter/s)": 0.029082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 87.31250190734863, "completions/min_length": 57.5, "epoch": 0.7833209233060313, "grad_norm": 0.0064715832707388215, "kl": 0.33251953125, "learning_rate": 9.883137837553967e-07, "loss": 0.00033247104147449136, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 526, "train_speed(iter/s)": 0.029102 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 87.94791984558105, "completions/min_length": 57.75, "epoch": 0.7848101265822784, "grad_norm": 1.847605738061028, "kl": 0.3447265625, "learning_rate": 9.882628973467972e-07, "loss": -0.011440422385931015, "memory(GiB)": 109.69, "reward": 1.7395833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 527, "train_speed(iter/s)": 0.029107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 85.96875190734863, "completions/min_length": 57.75, "epoch": 0.7862993298585257, "grad_norm": 0.012537472549407127, "kl": 0.33740234375, "learning_rate": 9.882119017047306e-07, "loss": 0.00033681391505524516, "memory(GiB)": 109.69, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 528, "train_speed(iter/s)": 0.029112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 88.86458778381348, "completions/min_length": 60.75, "epoch": 0.7877885331347729, "grad_norm": 0.0066862293505215165, "kl": 0.3291015625, "learning_rate": 9.881607968406052e-07, "loss": 0.0003288326261099428, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 529, "train_speed(iter/s)": 0.029112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 85.94791984558105, "completions/min_length": 49.0, "epoch": 0.7892777364110201, "grad_norm": 0.005615253515039924, "kl": 0.31787109375, "learning_rate": 9.881095827658547e-07, "loss": 0.00031751804635860026, "memory(GiB)": 109.69, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 530, "train_speed(iter/s)": 0.029116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.25, "completions/mean_length": 83.32291984558105, "completions/min_length": 60.5, "epoch": 0.7907669396872673, "grad_norm": 0.00707618258318517, "kl": 0.36328125, "learning_rate": 9.880582594919364e-07, "loss": 0.0003636353649199009, "memory(GiB)": 109.69, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 531, "train_speed(iter/s)": 0.029136 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 85.18750381469727, "completions/min_length": 57.75, "epoch": 0.7922561429635145, "grad_norm": 0.08288943534355203, "kl": 0.4853515625, "learning_rate": 9.880068270303322e-07, "loss": 0.0004865530354436487, "memory(GiB)": 109.69, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 532, "train_speed(iter/s)": 0.029132 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 92.18750190734863, "completions/min_length": 58.75, "epoch": 0.7937453462397617, "grad_norm": 0.005635926213476856, "kl": 0.33251953125, "learning_rate": 9.879552853925485e-07, "loss": 0.00033237793832086027, "memory(GiB)": 109.69, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 533, "train_speed(iter/s)": 0.029129 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 86.97916984558105, "completions/min_length": 59.0, "epoch": 0.7952345495160089, "grad_norm": 1.185631945834411, "kl": 0.3330078125, "learning_rate": 9.879036345901162e-07, "loss": -0.008226471021771431, "memory(GiB)": 109.69, "reward": 1.6979167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 534, "train_speed(iter/s)": 0.029133 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 88.42708396911621, "completions/min_length": 59.75, "epoch": 0.7967237527922562, "grad_norm": 1.1998160771198365, "kl": 0.3359375, "learning_rate": 9.878518746345905e-07, "loss": -0.009245321154594421, "memory(GiB)": 109.69, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 535, "train_speed(iter/s)": 0.029151 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.75, "completions/mean_length": 92.05208587646484, "completions/min_length": 58.5, "epoch": 0.7982129560685034, "grad_norm": 1.2129599818344385, "kl": 0.33251953125, "learning_rate": 9.87800005537551e-07, "loss": 0.033597998321056366, "memory(GiB)": 109.69, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 536, "train_speed(iter/s)": 0.029122 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 86.90625381469727, "completions/min_length": 59.75, "epoch": 0.7997021593447505, "grad_norm": 0.00635604825659406, "kl": 0.33447265625, "learning_rate": 9.87748027310602e-07, "loss": 0.0003343526041135192, "memory(GiB)": 109.69, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 537, "train_speed(iter/s)": 0.029103 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 90.16666793823242, "completions/min_length": 56.25, "epoch": 0.8011913626209978, "grad_norm": 0.00683814140802415, "kl": 0.3173828125, "learning_rate": 9.876959399653718e-07, "loss": 0.00031799249700270593, "memory(GiB)": 112.52, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 538, "train_speed(iter/s)": 0.029077 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 88.75000190734863, "completions/min_length": 64.5, "epoch": 0.802680565897245, "grad_norm": 1.6257680714393032, "kl": 0.34423828125, "learning_rate": 9.876437435135131e-07, "loss": 0.004171015694737434, "memory(GiB)": 112.52, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 539, "train_speed(iter/s)": 0.029095 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 86.09375190734863, "completions/min_length": 58.75, "epoch": 0.8041697691734921, "grad_norm": 2.1191947824714417, "kl": 0.31982421875, "learning_rate": 9.875914379667037e-07, "loss": -0.0004210217739455402, "memory(GiB)": 112.52, "reward": 1.572916716337204, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4957045316696167, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 540, "train_speed(iter/s)": 0.029101 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 90.76041984558105, "completions/min_length": 57.5, "epoch": 0.8056589724497394, "grad_norm": 1.1336562556705694, "kl": 0.30908203125, "learning_rate": 9.87539023336645e-07, "loss": 0.005263688042759895, "memory(GiB)": 112.52, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 541, "train_speed(iter/s)": 0.029105 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 80.81250381469727, "completions/min_length": 54.0, "epoch": 0.8071481757259866, "grad_norm": 1.853236641772589, "kl": 0.34814453125, "learning_rate": 9.87486499635063e-07, "loss": -0.001026800600811839, "memory(GiB)": 112.52, "reward": 1.4375000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.4375000149011612, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 542, "train_speed(iter/s)": 0.029111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.75, "completions/mean_length": 83.68750381469727, "completions/min_length": 57.0, "epoch": 0.8086373790022338, "grad_norm": 2.674997809434811, "kl": 0.33203125, "learning_rate": 9.874338668737089e-07, "loss": 0.01639718934893608, "memory(GiB)": 112.52, "reward": 1.6145833730697632, "reward_std": 0.14063050225377083, "rewards/CineAccuracyORM/mean": 0.6145833386108279, "rewards/CineAccuracyORM/std": 0.3297053314745426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 543, "train_speed(iter/s)": 0.029132 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 88.90625381469727, "completions/min_length": 58.0, "epoch": 0.810126582278481, "grad_norm": 0.0055662739991508055, "kl": 0.34033203125, "learning_rate": 9.873811250643568e-07, "loss": 0.0003399087581783533, "memory(GiB)": 112.52, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 544, "train_speed(iter/s)": 0.02915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 88.15625381469727, "completions/min_length": 58.0, "epoch": 0.8116157855547282, "grad_norm": 2.955494703520515, "kl": 0.33984375, "learning_rate": 9.873282742188065e-07, "loss": -0.00091451162006706, "memory(GiB)": 112.52, "reward": 1.4583333730697632, "reward_std": 0.16340987384319305, "rewards/CineAccuracyORM/mean": 0.4583333432674408, "rewards/CineAccuracyORM/std": 0.4790314584970474, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 545, "train_speed(iter/s)": 0.029155 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 83.92708587646484, "completions/min_length": 57.25, "epoch": 0.8131049888309755, "grad_norm": 1.3521197185877394, "kl": 0.32080078125, "learning_rate": 9.872753143488815e-07, "loss": 0.004484620410948992, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000298023224, "rewards/CineAccuracyORM/std": 0.40787915512919426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 546, "train_speed(iter/s)": 0.02916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 80.55208587646484, "completions/min_length": 48.75, "epoch": 0.8145941921072226, "grad_norm": 1.2291762829198618, "kl": 0.37451171875, "learning_rate": 9.8722224546643e-07, "loss": 0.0046890778467059135, "memory(GiB)": 112.52, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 547, "train_speed(iter/s)": 0.029165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 83.98958587646484, "completions/min_length": 57.25, "epoch": 0.8160833953834699, "grad_norm": 1.1875042394940964, "kl": 0.375, "learning_rate": 9.871690675833247e-07, "loss": 0.0008535304805263877, "memory(GiB)": 112.52, "reward": 1.6250000298023224, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.6250000037252903, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 548, "train_speed(iter/s)": 0.029154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.75, "completions/mean_length": 82.18750190734863, "completions/min_length": 56.25, "epoch": 0.8175725986597171, "grad_norm": 0.09948211583748899, "kl": 0.40234375, "learning_rate": 9.87115780711462e-07, "loss": 0.00040179718052968383, "memory(GiB)": 112.52, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 549, "train_speed(iter/s)": 0.029154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 81.89583587646484, "completions/min_length": 55.0, "epoch": 0.8190618019359642, "grad_norm": 2.305146945308774, "kl": 0.34765625, "learning_rate": 9.870623848627636e-07, "loss": 0.013700645416975021, "memory(GiB)": 112.52, "reward": 1.5520833730697632, "reward_std": 0.1293872930109501, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.45933252573013306, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 550, "train_speed(iter/s)": 0.029159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 85.79166984558105, "completions/min_length": 52.75, "epoch": 0.8205510052122115, "grad_norm": 1.0131093387736363, "kl": 0.34228515625, "learning_rate": 9.87008880049175e-07, "loss": 0.01141413114964962, "memory(GiB)": 112.52, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 551, "train_speed(iter/s)": 0.029151 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 82.07291984558105, "completions/min_length": 56.0, "epoch": 0.8220402084884587, "grad_norm": 0.006230304861838944, "kl": 0.38427734375, "learning_rate": 9.869552662826659e-07, "loss": 0.0003838286502286792, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 552, "train_speed(iter/s)": 0.029139 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 81.38541793823242, "completions/min_length": 52.0, "epoch": 0.8235294117647058, "grad_norm": 0.8373283392152748, "kl": 0.36767578125, "learning_rate": 9.869015435752313e-07, "loss": 0.007829689420759678, "memory(GiB)": 112.52, "reward": 1.3437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.3437500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 553, "train_speed(iter/s)": 0.029159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 77.14583587646484, "completions/min_length": 57.75, "epoch": 0.8250186150409531, "grad_norm": 0.6513814924674627, "kl": 0.38330078125, "learning_rate": 9.868477119388894e-07, "loss": 0.0007384542259387672, "memory(GiB)": 112.52, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 554, "train_speed(iter/s)": 0.029169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.5, "completions/mean_length": 75.80208587646484, "completions/min_length": 54.25, "epoch": 0.8265078183172003, "grad_norm": 1.316517550789001, "kl": 0.38623046875, "learning_rate": 9.867937713856837e-07, "loss": 0.0012163182254880667, "memory(GiB)": 112.52, "reward": 1.4270834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833460614085, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 555, "train_speed(iter/s)": 0.029149 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.0, "completions/mean_length": 78.81250190734863, "completions/min_length": 52.5, "epoch": 0.8279970215934475, "grad_norm": 1.2193495034251978, "kl": 0.3828125, "learning_rate": 9.867397219276817e-07, "loss": -0.006865666713565588, "memory(GiB)": 112.52, "reward": 1.3750000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.3750000074505806, "rewards/CineAccuracyORM/std": 0.3712640330195427, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 556, "train_speed(iter/s)": 0.029127 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 77.65625190734863, "completions/min_length": 48.0, "epoch": 0.8294862248696947, "grad_norm": 2.015009746572138, "kl": 0.39990234375, "learning_rate": 9.866855635769752e-07, "loss": -0.007559127174317837, "memory(GiB)": 112.52, "reward": 1.6250000596046448, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.48409245908260345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 557, "train_speed(iter/s)": 0.029131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/mean_length": 77.93750190734863, "completions/min_length": 54.75, "epoch": 0.8309754281459419, "grad_norm": 1.1396212497953206, "kl": 0.4013671875, "learning_rate": 9.866312963456799e-07, "loss": -0.004603826440870762, "memory(GiB)": 112.52, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 558, "train_speed(iter/s)": 0.02911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 76.09375190734863, "completions/min_length": 54.5, "epoch": 0.8324646314221892, "grad_norm": 2.0369318681808783, "kl": 0.39599609375, "learning_rate": 9.865769202459372e-07, "loss": -0.0005838819779455662, "memory(GiB)": 112.52, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 559, "train_speed(iter/s)": 0.029083 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 74.71875381469727, "completions/min_length": 51.5, "epoch": 0.8339538346984363, "grad_norm": 0.006463066007836863, "kl": 0.380859375, "learning_rate": 9.865224352899118e-07, "loss": 0.00038079856312833726, "memory(GiB)": 112.52, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 560, "train_speed(iter/s)": 0.029103 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 77.13541793823242, "completions/min_length": 52.0, "epoch": 0.8354430379746836, "grad_norm": 0.0070602664320594, "kl": 0.37841796875, "learning_rate": 9.86467841489793e-07, "loss": 0.0003779749386012554, "memory(GiB)": 112.52, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 561, "train_speed(iter/s)": 0.029123 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/mean_length": 76.45833587646484, "completions/min_length": 52.75, "epoch": 0.8369322412509308, "grad_norm": 1.3044891393793678, "kl": 0.34326171875, "learning_rate": 9.86413138857794e-07, "loss": 0.00011404618271626532, "memory(GiB)": 112.52, "reward": 1.6458333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3637066036462784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 562, "train_speed(iter/s)": 0.029128 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 78.05208587646484, "completions/min_length": 51.75, "epoch": 0.8384214445271779, "grad_norm": 2.520698481568386, "kl": 0.3701171875, "learning_rate": 9.863583274061533e-07, "loss": -0.009745754301548004, "memory(GiB)": 112.52, "reward": 1.6250000298023224, "reward_std": 0.1308017335832119, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.3712640330195427, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 563, "train_speed(iter/s)": 0.02912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 76.61458587646484, "completions/min_length": 53.5, "epoch": 0.8399106478034252, "grad_norm": 0.006982204390713788, "kl": 0.36181640625, "learning_rate": 9.863034071471332e-07, "loss": 0.00036161611205898225, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 564, "train_speed(iter/s)": 0.029126 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 74.23958396911621, "completions/min_length": 47.75, "epoch": 0.8413998510796724, "grad_norm": 2.357364911725053, "kl": 0.40771484375, "learning_rate": 9.862483780930202e-07, "loss": -0.003732041921466589, "memory(GiB)": 112.52, "reward": 1.5625000298023224, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.5625000074505806, "rewards/CineAccuracyORM/std": 0.48803938925266266, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 565, "train_speed(iter/s)": 0.029112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.5, "completions/mean_length": 78.23958587646484, "completions/min_length": 47.25, "epoch": 0.8428890543559195, "grad_norm": 1.6290088761304622, "kl": 0.37451171875, "learning_rate": 9.861932402561252e-07, "loss": -0.0006133377319201827, "memory(GiB)": 112.52, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.4888460487127304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 566, "train_speed(iter/s)": 0.029118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.75, "completions/mean_length": 76.07291984558105, "completions/min_length": 45.5, "epoch": 0.8443782576321668, "grad_norm": 2.313698737236339, "kl": 0.39697265625, "learning_rate": 9.861379936487839e-07, "loss": -0.015883658081293106, "memory(GiB)": 112.52, "reward": 1.4687500596046448, "reward_std": 0.12028077617287636, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.4841141924262047, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 567, "train_speed(iter/s)": 0.029088 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 76.03125190734863, "completions/min_length": 50.5, "epoch": 0.845867460908414, "grad_norm": 0.005954330858293882, "kl": 0.39404296875, "learning_rate": 9.86082638283356e-07, "loss": 0.0003934016858693212, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 568, "train_speed(iter/s)": 0.029092 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.25, "completions/mean_length": 76.30208587646484, "completions/min_length": 49.5, "epoch": 0.8473566641846612, "grad_norm": 0.0064107804163603364, "kl": 0.38623046875, "learning_rate": 9.86027174172225e-07, "loss": 0.0003855731920339167, "memory(GiB)": 112.52, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 569, "train_speed(iter/s)": 0.029099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 75.84375190734863, "completions/min_length": 50.0, "epoch": 0.8488458674609084, "grad_norm": 2.075241098456564, "kl": 0.39501953125, "learning_rate": 9.859716013277998e-07, "loss": -0.0008470177417621017, "memory(GiB)": 112.52, "reward": 1.760416716337204, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.7604166939854622, "rewards/CineAccuracyORM/std": 0.3496922627091408, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 570, "train_speed(iter/s)": 0.029076 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 78.79166793823242, "completions/min_length": 51.5, "epoch": 0.8503350707371556, "grad_norm": 0.6111088621826294, "kl": 0.3740234375, "learning_rate": 9.859159197625128e-07, "loss": 0.01202618982642889, "memory(GiB)": 112.52, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 571, "train_speed(iter/s)": 0.029083 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 75.40625190734863, "completions/min_length": 47.5, "epoch": 0.8518242740134029, "grad_norm": 1.1712652577584988, "kl": 0.38720703125, "learning_rate": 9.85860129488821e-07, "loss": -0.007169045507907867, "memory(GiB)": 112.52, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 572, "train_speed(iter/s)": 0.029093 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.75, "completions/mean_length": 75.41666984558105, "completions/min_length": 52.0, "epoch": 0.85331347728965, "grad_norm": 0.00536975069623488, "kl": 0.37548828125, "learning_rate": 9.85804230519206e-07, "loss": 0.00037528638495132327, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 573, "train_speed(iter/s)": 0.0291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.5, "completions/mean_length": 77.29166984558105, "completions/min_length": 47.75, "epoch": 0.8548026805658973, "grad_norm": 1.4720244825922615, "kl": 0.359375, "learning_rate": 9.857482228661733e-07, "loss": -0.0013411666732281446, "memory(GiB)": 112.52, "reward": 1.6562500596046448, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.6562500111758709, "rewards/CineAccuracyORM/std": 0.3192720115184784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 574, "train_speed(iter/s)": 0.02912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.0, "completions/mean_length": 78.79166793823242, "completions/min_length": 49.0, "epoch": 0.8562918838421445, "grad_norm": 0.7419209899278919, "kl": 0.40771484375, "learning_rate": 9.856921065422526e-07, "loss": 0.00831070076674223, "memory(GiB)": 112.52, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 575, "train_speed(iter/s)": 0.029104 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.5, "completions/mean_length": 75.27083396911621, "completions/min_length": 49.25, "epoch": 0.8577810871183916, "grad_norm": 0.006092779588224755, "kl": 0.36767578125, "learning_rate": 9.856358815599984e-07, "loss": 0.0003672169696073979, "memory(GiB)": 112.52, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 576, "train_speed(iter/s)": 0.029111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 75.90625190734863, "completions/min_length": 51.5, "epoch": 0.8592702903946389, "grad_norm": 0.007633778159386049, "kl": 0.37548828125, "learning_rate": 9.855795479319893e-07, "loss": 0.0003754511708393693, "memory(GiB)": 112.52, "reward": 1.2500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.2500000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 577, "train_speed(iter/s)": 0.029121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 73.18750381469727, "completions/min_length": 50.25, "epoch": 0.8607594936708861, "grad_norm": 0.9227706833976814, "kl": 0.40234375, "learning_rate": 9.85523105670828e-07, "loss": 0.004458785522729158, "memory(GiB)": 112.52, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 578, "train_speed(iter/s)": 0.029141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.5, "completions/mean_length": 73.89583587646484, "completions/min_length": 45.0, "epoch": 0.8622486969471332, "grad_norm": 2.006039305119382, "kl": 0.369140625, "learning_rate": 9.854665547891418e-07, "loss": -0.01520291343331337, "memory(GiB)": 112.52, "reward": 1.6875000596046448, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.6875000298023224, "rewards/CineAccuracyORM/std": 0.46184761822223663, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 579, "train_speed(iter/s)": 0.029157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.25, "completions/mean_length": 69.76041793823242, "completions/min_length": 41.5, "epoch": 0.8637379002233805, "grad_norm": 1.652370619454665, "kl": 0.41357421875, "learning_rate": 9.854098952995821e-07, "loss": -0.00648915208876133, "memory(GiB)": 112.52, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 580, "train_speed(iter/s)": 0.029163 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.75, "completions/mean_length": 71.77083396911621, "completions/min_length": 46.5, "epoch": 0.8652271034996277, "grad_norm": 2.3679126806388204, "kl": 0.39404296875, "learning_rate": 9.853531272148247e-07, "loss": 0.006342240609228611, "memory(GiB)": 112.52, "reward": 1.7708333730697632, "reward_std": 0.12483403086662292, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 581, "train_speed(iter/s)": 0.029168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 71.25000190734863, "completions/min_length": 48.25, "epoch": 0.8667163067758749, "grad_norm": 3.2723314652639526, "kl": 0.40966796875, "learning_rate": 9.8529625054757e-07, "loss": 0.005929957143962383, "memory(GiB)": 112.52, "reward": 1.6562500298023224, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.34280356764793396, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 582, "train_speed(iter/s)": 0.029162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 74.68750190734863, "completions/min_length": 50.0, "epoch": 0.8682055100521221, "grad_norm": 0.8875177025324662, "kl": 0.40966796875, "learning_rate": 9.852392653105417e-07, "loss": 0.0016564615070819855, "memory(GiB)": 112.52, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 583, "train_speed(iter/s)": 0.029155 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 72.03125381469727, "completions/min_length": 47.5, "epoch": 0.8696947133283693, "grad_norm": 1.008672026566808, "kl": 0.41357421875, "learning_rate": 9.85182171516489e-07, "loss": -0.007280740886926651, "memory(GiB)": 112.52, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 584, "train_speed(iter/s)": 0.029153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/mean_length": 75.82291984558105, "completions/min_length": 47.25, "epoch": 0.8711839166046166, "grad_norm": 0.0062746344636582015, "kl": 0.42578125, "learning_rate": 9.851249691781848e-07, "loss": 0.0004256304237060249, "memory(GiB)": 112.52, "reward": 1.3333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.3333333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 585, "train_speed(iter/s)": 0.029119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.0, "completions/mean_length": 72.86458587646484, "completions/min_length": 51.0, "epoch": 0.8726731198808637, "grad_norm": 1.3877567008972358, "kl": 0.4189453125, "learning_rate": 9.850676583084262e-07, "loss": -0.004011595621705055, "memory(GiB)": 112.52, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 586, "train_speed(iter/s)": 0.029124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.5, "completions/mean_length": 74.25000190734863, "completions/min_length": 50.75, "epoch": 0.874162323157111, "grad_norm": 1.2923874145694663, "kl": 0.4150390625, "learning_rate": 9.850102389200344e-07, "loss": -0.00044212344801053405, "memory(GiB)": 112.52, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 587, "train_speed(iter/s)": 0.02913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 73.88541793823242, "completions/min_length": 48.75, "epoch": 0.8756515264333582, "grad_norm": 2.223624513325125, "kl": 0.4072265625, "learning_rate": 9.849527110258557e-07, "loss": 3.3362459362251684e-05, "memory(GiB)": 112.52, "reward": 1.5000000298023224, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.40968769788742065, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 588, "train_speed(iter/s)": 0.02915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 72.51041793823242, "completions/min_length": 51.25, "epoch": 0.8771407297096053, "grad_norm": 1.1722657845444695, "kl": 0.38525390625, "learning_rate": 9.848950746387598e-07, "loss": -0.0003558211028575897, "memory(GiB)": 112.52, "reward": 1.7500000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 589, "train_speed(iter/s)": 0.029157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 72.28125190734863, "completions/min_length": 47.5, "epoch": 0.8786299329858526, "grad_norm": 2.5739444742359905, "kl": 0.3974609375, "learning_rate": 9.848373297716414e-07, "loss": -0.008111739531159401, "memory(GiB)": 112.52, "reward": 1.6250000596046448, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.6250000149011612, "rewards/CineAccuracyORM/std": 0.4772557094693184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 590, "train_speed(iter/s)": 0.029136 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 72.05208587646484, "completions/min_length": 47.25, "epoch": 0.8801191362620998, "grad_norm": 0.02040684058179204, "kl": 0.40771484375, "learning_rate": 9.847794764374186e-07, "loss": 0.0004077297053299844, "memory(GiB)": 112.52, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 591, "train_speed(iter/s)": 0.029137 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.25, "completions/mean_length": 71.45833778381348, "completions/min_length": 45.5, "epoch": 0.8816083395383469, "grad_norm": 0.010584749999328241, "kl": 0.42822265625, "learning_rate": 9.847215146490347e-07, "loss": 0.00042821787064895034, "memory(GiB)": 112.52, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 592, "train_speed(iter/s)": 0.029141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.5, "completions/mean_length": 71.46875190734863, "completions/min_length": 50.5, "epoch": 0.8830975428145942, "grad_norm": 0.007341852827130645, "kl": 0.38330078125, "learning_rate": 9.846634444194567e-07, "loss": 0.00038344733184203506, "memory(GiB)": 112.52, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 593, "train_speed(iter/s)": 0.02916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.25, "completions/mean_length": 70.72916984558105, "completions/min_length": 47.0, "epoch": 0.8845867460908414, "grad_norm": 0.00870429144499194, "kl": 0.43701171875, "learning_rate": 9.84605265761676e-07, "loss": 0.00043711112812161446, "memory(GiB)": 112.52, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 594, "train_speed(iter/s)": 0.029153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 75.78125381469727, "completions/min_length": 50.5, "epoch": 0.8860759493670886, "grad_norm": 1.428103005847408, "kl": 0.39990234375, "learning_rate": 9.845469786887079e-07, "loss": 0.002914824988692999, "memory(GiB)": 112.52, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000111758709, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 595, "train_speed(iter/s)": 0.029158 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.25, "completions/mean_length": 69.79166889190674, "completions/min_length": 47.75, "epoch": 0.8875651526433358, "grad_norm": 1.0564084218280585, "kl": 0.41552734375, "learning_rate": 9.844885832135927e-07, "loss": 0.0029438920319080353, "memory(GiB)": 112.52, "reward": 1.5312500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 596, "train_speed(iter/s)": 0.029178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 72.97916793823242, "completions/min_length": 49.25, "epoch": 0.889054355919583, "grad_norm": 0.6469137896670478, "kl": 0.38134765625, "learning_rate": 9.844300793493945e-07, "loss": 0.006353406235575676, "memory(GiB)": 112.52, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 597, "train_speed(iter/s)": 0.029172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 73.55208778381348, "completions/min_length": 49.5, "epoch": 0.8905435591958303, "grad_norm": 0.006378617922002938, "kl": 0.38427734375, "learning_rate": 9.843714671092018e-07, "loss": 0.0003840494900941849, "memory(GiB)": 112.52, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 598, "train_speed(iter/s)": 0.029191 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.5, "completions/mean_length": 71.19791793823242, "completions/min_length": 49.5, "epoch": 0.8920327624720774, "grad_norm": 2.235590489202261, "kl": 0.3837890625, "learning_rate": 9.843127465061269e-07, "loss": 0.004083208739757538, "memory(GiB)": 112.52, "reward": 1.5000000596046448, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.5000000223517418, "rewards/CineAccuracyORM/std": 0.48960913717746735, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 599, "train_speed(iter/s)": 0.029191 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 74.05208396911621, "completions/min_length": 49.75, "epoch": 0.8935219657483247, "grad_norm": 1.9063304012595204, "kl": 0.39111328125, "learning_rate": 9.84253917553307e-07, "loss": 0.0015885974280536175, "memory(GiB)": 112.52, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 600, "train_speed(iter/s)": 0.029201 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 71.22916984558105, "completions/min_length": 47.0, "epoch": 0.8950111690245719, "grad_norm": 2.3740969415011133, "kl": 0.39990234375, "learning_rate": 9.84194980263903e-07, "loss": -2.6005632207670715e-06, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4772557094693184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 601, "train_speed(iter/s)": 0.029202 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/mean_length": 70.35416984558105, "completions/min_length": 48.25, "epoch": 0.896500372300819, "grad_norm": 0.005494544343667675, "kl": 0.392578125, "learning_rate": 9.841359346511002e-07, "loss": 0.0003923073527403176, "memory(GiB)": 112.52, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 602, "train_speed(iter/s)": 0.02921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.5, "completions/mean_length": 71.89583587646484, "completions/min_length": 45.25, "epoch": 0.8979895755770663, "grad_norm": 0.0054802518768054306, "kl": 0.400390625, "learning_rate": 9.840767807281085e-07, "loss": 0.0003999320324510336, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 603, "train_speed(iter/s)": 0.029217 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 72.77083587646484, "completions/min_length": 46.5, "epoch": 0.8994787788533135, "grad_norm": 0.00520410419539524, "kl": 0.4033203125, "learning_rate": 9.840175185081618e-07, "loss": 0.0004034956218674779, "memory(GiB)": 112.52, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 604, "train_speed(iter/s)": 0.029195 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.5, "completions/mean_length": 69.63541793823242, "completions/min_length": 45.0, "epoch": 0.9009679821295606, "grad_norm": 1.1738058088597916, "kl": 0.3779296875, "learning_rate": 9.83958148004518e-07, "loss": 0.004811132326722145, "memory(GiB)": 112.52, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 605, "train_speed(iter/s)": 0.029201 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.25, "completions/mean_length": 69.45833587646484, "completions/min_length": 41.25, "epoch": 0.9024571854058079, "grad_norm": 0.005218041258748448, "kl": 0.419921875, "learning_rate": 9.83898669230459e-07, "loss": 0.00042012729682028294, "memory(GiB)": 112.52, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 606, "train_speed(iter/s)": 0.029191 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.25, "completions/mean_length": 69.19791793823242, "completions/min_length": 47.25, "epoch": 0.9039463886820551, "grad_norm": 1.5585105631295835, "kl": 0.3984375, "learning_rate": 9.838390821992917e-07, "loss": 0.0056547317653894424, "memory(GiB)": 112.52, "reward": 1.8125, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.2080918326973915, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 607, "train_speed(iter/s)": 0.029198 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.25, "completions/mean_length": 68.34375381469727, "completions/min_length": 45.0, "epoch": 0.9054355919583023, "grad_norm": 0.005063166469637201, "kl": 0.43505859375, "learning_rate": 9.837793869243467e-07, "loss": 0.0004343453038018197, "memory(GiB)": 112.52, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 608, "train_speed(iter/s)": 0.029174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.25, "completions/mean_length": 70.85416793823242, "completions/min_length": 40.25, "epoch": 0.9069247952345495, "grad_norm": 1.448491023615666, "kl": 0.3828125, "learning_rate": 9.837195834189791e-07, "loss": -0.01579442247748375, "memory(GiB)": 112.52, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 609, "train_speed(iter/s)": 0.029179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.5, "completions/mean_length": 69.04166889190674, "completions/min_length": 42.5, "epoch": 0.9084139985107967, "grad_norm": 1.3265794085586156, "kl": 0.408203125, "learning_rate": 9.83659671696568e-07, "loss": -0.003318440169095993, "memory(GiB)": 112.52, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 610, "train_speed(iter/s)": 0.029171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.25, "completions/mean_length": 69.55208587646484, "completions/min_length": 45.75, "epoch": 0.909903201787044, "grad_norm": 0.005620886054935561, "kl": 0.40283203125, "learning_rate": 9.835996517705168e-07, "loss": 0.0004020759370177984, "memory(GiB)": 112.52, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 611, "train_speed(iter/s)": 0.029153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.75, "completions/mean_length": 70.28125190734863, "completions/min_length": 43.25, "epoch": 0.9113924050632911, "grad_norm": 0.9237920570237771, "kl": 0.39990234375, "learning_rate": 9.83539523654253e-07, "loss": -0.004159916192293167, "memory(GiB)": 112.52, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 612, "train_speed(iter/s)": 0.029172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/mean_length": 72.20833587646484, "completions/min_length": 50.0, "epoch": 0.9128816083395384, "grad_norm": 0.00519606824799348, "kl": 0.40185546875, "learning_rate": 9.834792873612283e-07, "loss": 0.0004021385102532804, "memory(GiB)": 112.52, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 613, "train_speed(iter/s)": 0.029162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/mean_length": 72.17708587646484, "completions/min_length": 44.75, "epoch": 0.9143708116157856, "grad_norm": 0.004972373174626522, "kl": 0.39208984375, "learning_rate": 9.834189429049186e-07, "loss": 0.0003919163136743009, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 614, "train_speed(iter/s)": 0.029168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 69.03125381469727, "completions/min_length": 48.25, "epoch": 0.9158600148920327, "grad_norm": 1.8697445178177639, "kl": 0.400390625, "learning_rate": 9.833584902988245e-07, "loss": -0.006663159932941198, "memory(GiB)": 112.52, "reward": 1.4895833730697632, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 615, "train_speed(iter/s)": 0.029187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/mean_length": 68.55208587646484, "completions/min_length": 46.0, "epoch": 0.91734921816828, "grad_norm": 0.005458263354674201, "kl": 0.419921875, "learning_rate": 9.8329792955647e-07, "loss": 0.00041969199082814157, "memory(GiB)": 112.52, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 616, "train_speed(iter/s)": 0.029162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.75, "completions/mean_length": 70.47916984558105, "completions/min_length": 52.0, "epoch": 0.9188384214445272, "grad_norm": 0.8550147713348262, "kl": 0.41455078125, "learning_rate": 9.832372606914036e-07, "loss": -0.0023310009855777025, "memory(GiB)": 112.52, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 617, "train_speed(iter/s)": 0.029169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/mean_length": 68.87500190734863, "completions/min_length": 48.5, "epoch": 0.9203276247207743, "grad_norm": 1.61185471156378, "kl": 0.42041015625, "learning_rate": 9.831764837171984e-07, "loss": 0.0014587128534913063, "memory(GiB)": 112.52, "reward": 1.6770833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.4749870151281357, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 618, "train_speed(iter/s)": 0.029188 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 68.32291793823242, "completions/min_length": 45.5, "epoch": 0.9218168279970216, "grad_norm": 1.0168286033793945, "kl": 0.3857421875, "learning_rate": 9.831155986474508e-07, "loss": -0.0020774039439857006, "memory(GiB)": 112.52, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 619, "train_speed(iter/s)": 0.029207 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 71.35416984558105, "completions/min_length": 52.75, "epoch": 0.9233060312732688, "grad_norm": 1.4626583801476445, "kl": 0.39990234375, "learning_rate": 9.830546054957827e-07, "loss": 0.0007984896656125784, "memory(GiB)": 112.52, "reward": 1.6250000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6250000149011612, "rewards/CineAccuracyORM/std": 0.17827537283301353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 620, "train_speed(iter/s)": 0.029213 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.25, "completions/mean_length": 71.86458396911621, "completions/min_length": 44.75, "epoch": 0.924795234549516, "grad_norm": 1.6614316907779727, "kl": 0.39501953125, "learning_rate": 9.829935042758385e-07, "loss": -0.00858182180672884, "memory(GiB)": 112.52, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 621, "train_speed(iter/s)": 0.029207 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 68.48958587646484, "completions/min_length": 43.75, "epoch": 0.9262844378257632, "grad_norm": 2.455408305601539, "kl": 0.4013671875, "learning_rate": 9.829322950012886e-07, "loss": 0.006376785226166248, "memory(GiB)": 112.52, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 622, "train_speed(iter/s)": 0.029225 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 74.31250381469727, "completions/min_length": 47.5, "epoch": 0.9277736411020104, "grad_norm": 0.005106942924115156, "kl": 0.3984375, "learning_rate": 9.828709776858259e-07, "loss": 0.00039804051630198956, "memory(GiB)": 112.52, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 623, "train_speed(iter/s)": 0.02923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 71.00000190734863, "completions/min_length": 51.75, "epoch": 0.9292628443782577, "grad_norm": 1.3436756234045708, "kl": 0.40869140625, "learning_rate": 9.828095523431685e-07, "loss": -0.0055846646428108215, "memory(GiB)": 112.52, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 624, "train_speed(iter/s)": 0.029222 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 70.64583587646484, "completions/min_length": 45.5, "epoch": 0.9307520476545048, "grad_norm": 1.3447416327022965, "kl": 0.40380859375, "learning_rate": 9.827480189870586e-07, "loss": 0.0011397681664675474, "memory(GiB)": 112.52, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 625, "train_speed(iter/s)": 0.029213 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 72.68750190734863, "completions/min_length": 50.75, "epoch": 0.932241250930752, "grad_norm": 1.4868655615299018, "kl": 0.4111328125, "learning_rate": 9.826863776312618e-07, "loss": -0.004275862127542496, "memory(GiB)": 112.52, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 626, "train_speed(iter/s)": 0.029231 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.25, "completions/mean_length": 70.88541793823242, "completions/min_length": 47.75, "epoch": 0.9337304542069993, "grad_norm": 1.7003607860903247, "kl": 0.3984375, "learning_rate": 9.826246282895691e-07, "loss": 0.00558354239910841, "memory(GiB)": 112.52, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 627, "train_speed(iter/s)": 0.029237 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.25, "completions/mean_length": 73.32291984558105, "completions/min_length": 45.25, "epoch": 0.9352196574832464, "grad_norm": 1.5256441431020593, "kl": 0.396484375, "learning_rate": 9.825627709757946e-07, "loss": 0.0005217051366344094, "memory(GiB)": 112.52, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 628, "train_speed(iter/s)": 0.029229 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.5, "completions/mean_length": 73.08333587646484, "completions/min_length": 48.5, "epoch": 0.9367088607594937, "grad_norm": 1.190968738742739, "kl": 0.37841796875, "learning_rate": 9.825008057037768e-07, "loss": 0.0032519339583814144, "memory(GiB)": 112.52, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 629, "train_speed(iter/s)": 0.029236 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.5, "completions/mean_length": 70.02083778381348, "completions/min_length": 48.25, "epoch": 0.9381980640357409, "grad_norm": 3.2433563000782577, "kl": 0.40625, "learning_rate": 9.824387324873785e-07, "loss": 0.004756885580718517, "memory(GiB)": 112.52, "reward": 1.5208333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.49164988845586777, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 630, "train_speed(iter/s)": 0.029254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.25, "completions/mean_length": 70.02083778381348, "completions/min_length": 50.5, "epoch": 0.939687267311988, "grad_norm": 0.005208109404384754, "kl": 0.400390625, "learning_rate": 9.823765513404872e-07, "loss": 0.0004002791829407215, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 631, "train_speed(iter/s)": 0.029265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 68.09375190734863, "completions/min_length": 42.5, "epoch": 0.9411764705882353, "grad_norm": 0.14170570989777123, "kl": 0.46142578125, "learning_rate": 9.823142622770134e-07, "loss": 0.00046214740723371506, "memory(GiB)": 112.52, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 632, "train_speed(iter/s)": 0.029266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 69.51041793823242, "completions/min_length": 44.75, "epoch": 0.9426656738644825, "grad_norm": 0.005080563266714895, "kl": 0.41162109375, "learning_rate": 9.822518653108923e-07, "loss": 0.0004114430339541286, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 633, "train_speed(iter/s)": 0.029259 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.5, "completions/mean_length": 69.32291984558105, "completions/min_length": 45.75, "epoch": 0.9441548771407298, "grad_norm": 0.005428917281493757, "kl": 0.3935546875, "learning_rate": 9.821893604560837e-07, "loss": 0.00039303707308135927, "memory(GiB)": 112.52, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 634, "train_speed(iter/s)": 0.029276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 71.68750381469727, "completions/min_length": 47.75, "epoch": 0.9456440804169769, "grad_norm": 1.2748903567049192, "kl": 0.421875, "learning_rate": 9.821267477265705e-07, "loss": 0.007331032305955887, "memory(GiB)": 112.52, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 635, "train_speed(iter/s)": 0.029269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/mean_length": 72.80208396911621, "completions/min_length": 46.0, "epoch": 0.9471332836932241, "grad_norm": 0.0048179920812600665, "kl": 0.419921875, "learning_rate": 9.820640271363608e-07, "loss": 0.0004188363382127136, "memory(GiB)": 112.52, "reward": 1.3333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.3333333432674408, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 636, "train_speed(iter/s)": 0.029263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/mean_length": 69.76041793823242, "completions/min_length": 48.25, "epoch": 0.9486224869694714, "grad_norm": 0.005617817632545761, "kl": 0.40087890625, "learning_rate": 9.820011986994861e-07, "loss": 0.00040009021176956594, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 637, "train_speed(iter/s)": 0.029256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.25, "completions/mean_length": 72.28125381469727, "completions/min_length": 42.25, "epoch": 0.9501116902457185, "grad_norm": 1.3735701659398485, "kl": 0.421875, "learning_rate": 9.819382624300026e-07, "loss": 0.004551785998046398, "memory(GiB)": 112.52, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 638, "train_speed(iter/s)": 0.029248 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 72.63541984558105, "completions/min_length": 50.25, "epoch": 0.9516008935219658, "grad_norm": 0.005650860984954299, "kl": 0.40673828125, "learning_rate": 9.8187521834199e-07, "loss": 0.0004066992551088333, "memory(GiB)": 112.52, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 639, "train_speed(iter/s)": 0.029252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 69.37500381469727, "completions/min_length": 48.25, "epoch": 0.953090096798213, "grad_norm": 1.9526685036886386, "kl": 0.423828125, "learning_rate": 9.818120664495526e-07, "loss": 0.008565513417124748, "memory(GiB)": 112.52, "reward": 1.7500000298023224, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3600961044430733, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 640, "train_speed(iter/s)": 0.029244 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.5, "completions/mean_length": 70.54166984558105, "completions/min_length": 46.5, "epoch": 0.9545793000744601, "grad_norm": 0.006094690359642097, "kl": 0.3701171875, "learning_rate": 9.817488067668186e-07, "loss": 0.00037015078123658895, "memory(GiB)": 112.52, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 641, "train_speed(iter/s)": 0.029261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 68.68750190734863, "completions/min_length": 43.75, "epoch": 0.9560685033507074, "grad_norm": 0.0058232657640693375, "kl": 0.427734375, "learning_rate": 9.816854393079402e-07, "loss": 0.00042724923696368933, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 642, "train_speed(iter/s)": 0.029257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.75, "completions/mean_length": 73.16666984558105, "completions/min_length": 46.5, "epoch": 0.9575577066269546, "grad_norm": 0.006178687383978349, "kl": 0.42578125, "learning_rate": 9.81621964087094e-07, "loss": 0.00042563313036225736, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 643, "train_speed(iter/s)": 0.029275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 69.58333587646484, "completions/min_length": 48.25, "epoch": 0.9590469099032017, "grad_norm": 0.005589512560931955, "kl": 0.41015625, "learning_rate": 9.815583811184808e-07, "loss": 0.00041000748751685023, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 644, "train_speed(iter/s)": 0.029282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.75, "completions/mean_length": 70.82291984558105, "completions/min_length": 51.0, "epoch": 0.960536113179449, "grad_norm": 0.005669334251012383, "kl": 0.42529296875, "learning_rate": 9.81494690416325e-07, "loss": 0.00042497669346630573, "memory(GiB)": 112.52, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 645, "train_speed(iter/s)": 0.029261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.5, "completions/mean_length": 69.54166984558105, "completions/min_length": 43.5, "epoch": 0.9620253164556962, "grad_norm": 2.437451441091755, "kl": 0.4365234375, "learning_rate": 9.814308919948755e-07, "loss": -0.00022351711231749505, "memory(GiB)": 112.52, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 646, "train_speed(iter/s)": 0.029253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/mean_length": 66.36458587646484, "completions/min_length": 43.0, "epoch": 0.9635145197319435, "grad_norm": 4.213076560240519, "kl": 0.4287109375, "learning_rate": 9.813669858684053e-07, "loss": -0.0064424797892570496, "memory(GiB)": 112.52, "reward": 1.6666666865348816, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.34154878556728363, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 647, "train_speed(iter/s)": 0.02926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 70.60416984558105, "completions/min_length": 48.0, "epoch": 0.9650037230081906, "grad_norm": 2.3915198613487485, "kl": 0.37060546875, "learning_rate": 9.813029720512112e-07, "loss": -0.006217451766133308, "memory(GiB)": 112.52, "reward": 1.7395833730697632, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.35178712010383606, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 648, "train_speed(iter/s)": 0.029267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 67.44791984558105, "completions/min_length": 45.75, "epoch": 0.9664929262844378, "grad_norm": 1.8607465453230345, "kl": 0.41748046875, "learning_rate": 9.812388505576144e-07, "loss": 0.0041600847616791725, "memory(GiB)": 112.52, "reward": 1.7187500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.25314687192440033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 649, "train_speed(iter/s)": 0.029273 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 71.35416984558105, "completions/min_length": 49.25, "epoch": 0.9679821295606851, "grad_norm": 0.005838738237570774, "kl": 0.42333984375, "learning_rate": 9.811746214019599e-07, "loss": 0.00042335205944254994, "memory(GiB)": 112.52, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 650, "train_speed(iter/s)": 0.029291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 72.33333587646484, "completions/min_length": 48.25, "epoch": 0.9694713328369322, "grad_norm": 0.004871262877852506, "kl": 0.39013671875, "learning_rate": 9.81110284598617e-07, "loss": 0.00038963055703788996, "memory(GiB)": 112.52, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 651, "train_speed(iter/s)": 0.029308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.75, "completions/mean_length": 71.10416984558105, "completions/min_length": 49.25, "epoch": 0.9709605361131795, "grad_norm": 0.005802153547507584, "kl": 0.4140625, "learning_rate": 9.810458401619792e-07, "loss": 0.000413952162489295, "memory(GiB)": 112.52, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 652, "train_speed(iter/s)": 0.029325 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.25, "completions/mean_length": 69.97916984558105, "completions/min_length": 45.5, "epoch": 0.9724497393894267, "grad_norm": 0.005227892887293894, "kl": 0.41064453125, "learning_rate": 9.809812881064639e-07, "loss": 0.00041095499182119966, "memory(GiB)": 112.52, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 653, "train_speed(iter/s)": 0.029328 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 70.09375190734863, "completions/min_length": 41.0, "epoch": 0.9739389426656738, "grad_norm": 1.0923725480307174, "kl": 0.40234375, "learning_rate": 9.809166284465125e-07, "loss": 0.0064291986636817455, "memory(GiB)": 112.52, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 654, "train_speed(iter/s)": 0.029322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 67.14583587646484, "completions/min_length": 45.75, "epoch": 0.9754281459419211, "grad_norm": 0.006587862214507907, "kl": 0.40380859375, "learning_rate": 9.808518611965906e-07, "loss": 0.0004037757171317935, "memory(GiB)": 112.52, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 655, "train_speed(iter/s)": 0.02934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.75, "completions/mean_length": 67.88541984558105, "completions/min_length": 44.0, "epoch": 0.9769173492181683, "grad_norm": 0.005741206385405985, "kl": 0.41259765625, "learning_rate": 9.807869863711876e-07, "loss": 0.00041203334694728255, "memory(GiB)": 112.52, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 656, "train_speed(iter/s)": 0.029345 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 72.72916984558105, "completions/min_length": 47.75, "epoch": 0.9784065524944154, "grad_norm": 2.7617368330755556, "kl": 0.43115234375, "learning_rate": 9.807220039848178e-07, "loss": -0.00828004814684391, "memory(GiB)": 112.52, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 657, "train_speed(iter/s)": 0.029336 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.75, "completions/mean_length": 69.26041984558105, "completions/min_length": 43.75, "epoch": 0.9798957557706627, "grad_norm": 1.2953205541295427, "kl": 0.40771484375, "learning_rate": 9.806569140520184e-07, "loss": -0.0030092690140008926, "memory(GiB)": 112.52, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 658, "train_speed(iter/s)": 0.029341 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.75, "completions/mean_length": 72.30208778381348, "completions/min_length": 44.75, "epoch": 0.9813849590469099, "grad_norm": 2.4466404432224276, "kl": 0.392578125, "learning_rate": 9.805917165873515e-07, "loss": -0.004878911655396223, "memory(GiB)": 112.52, "reward": 1.291666716337204, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.291666679084301, "rewards/CineAccuracyORM/std": 0.4605609029531479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 659, "train_speed(iter/s)": 0.029333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.25, "completions/mean_length": 68.83333587646484, "completions/min_length": 40.25, "epoch": 0.9828741623231572, "grad_norm": 0.005386109791111296, "kl": 0.39990234375, "learning_rate": 9.805264116054026e-07, "loss": 0.0003997997264377773, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 660, "train_speed(iter/s)": 0.029321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.75, "completions/mean_length": 69.33333396911621, "completions/min_length": 47.5, "epoch": 0.9843633655994043, "grad_norm": 2.004604997931502, "kl": 0.42724609375, "learning_rate": 9.80460999120782e-07, "loss": 0.008620142936706543, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.45247404277324677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 661, "train_speed(iter/s)": 0.029315 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 69.07291793823242, "completions/min_length": 43.75, "epoch": 0.9858525688756515, "grad_norm": 0.0057376099873585685, "kl": 0.4091796875, "learning_rate": 9.803954791481238e-07, "loss": 0.000409088796004653, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 662, "train_speed(iter/s)": 0.029319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 71.12500381469727, "completions/min_length": 46.5, "epoch": 0.9873417721518988, "grad_norm": 1.4676318852748103, "kl": 0.38916015625, "learning_rate": 9.803298517020856e-07, "loss": -0.006514364387840033, "memory(GiB)": 112.52, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 663, "train_speed(iter/s)": 0.029324 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 70.22916984558105, "completions/min_length": 47.25, "epoch": 0.9888309754281459, "grad_norm": 1.5165606510497314, "kl": 0.39453125, "learning_rate": 9.8026411679735e-07, "loss": 0.009474493563175201, "memory(GiB)": 112.52, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 664, "train_speed(iter/s)": 0.029329 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.5, "completions/mean_length": 69.65625190734863, "completions/min_length": 47.5, "epoch": 0.9903201787043932, "grad_norm": 0.006046375906533138, "kl": 0.4072265625, "learning_rate": 9.801982744486228e-07, "loss": 0.000407306564738974, "memory(GiB)": 112.52, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 665, "train_speed(iter/s)": 0.029333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.25, "completions/mean_length": 69.93750190734863, "completions/min_length": 50.25, "epoch": 0.9918093819806404, "grad_norm": 1.6544054406357607, "kl": 0.5126953125, "learning_rate": 9.801323246706342e-07, "loss": 0.0027588047087192535, "memory(GiB)": 112.52, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 666, "train_speed(iter/s)": 0.029351 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 69.60416793823242, "completions/min_length": 43.25, "epoch": 0.9932985852568875, "grad_norm": 2.597016185605951, "kl": 0.421875, "learning_rate": 9.800662674781381e-07, "loss": 0.013966227881610394, "memory(GiB)": 112.52, "reward": 1.760416716337204, "reward_std": 0.0876726359128952, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 667, "train_speed(iter/s)": 0.029369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.75, "completions/mean_length": 71.81250190734863, "completions/min_length": 42.25, "epoch": 0.9947877885331348, "grad_norm": 1.5633340107048523, "kl": 0.40576171875, "learning_rate": 9.800001028859133e-07, "loss": -0.0007719228742644191, "memory(GiB)": 112.52, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 668, "train_speed(iter/s)": 0.029348 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.5, "completions/mean_length": 68.90625, "completions/min_length": 47.25, "epoch": 0.996276991809382, "grad_norm": 0.0050270402672350336, "kl": 0.41552734375, "learning_rate": 9.799338309087618e-07, "loss": 0.0004153680638410151, "memory(GiB)": 112.52, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 669, "train_speed(iter/s)": 0.029365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.25, "completions/mean_length": 76.35416984558105, "completions/min_length": 48.5, "epoch": 0.9977661950856291, "grad_norm": 0.005092049417364895, "kl": 0.3837890625, "learning_rate": 9.798674515615095e-07, "loss": 0.000382447789888829, "memory(GiB)": 112.52, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 670, "train_speed(iter/s)": 0.02936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.5, "completions/mean_length": 70.26041793823242, "completions/min_length": 46.5, "epoch": 0.9992553983618764, "grad_norm": 0.005861505526862896, "kl": 0.40673828125, "learning_rate": 9.798009648590073e-07, "loss": 0.00040677119977772236, "memory(GiB)": 112.52, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 671, "train_speed(iter/s)": 0.029354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/mean_length": 64.75000190734863, "completions/min_length": 44.5, "epoch": 1.0014892032762472, "grad_norm": 1.86188619312676, "kl": 0.43896484375, "learning_rate": 9.79734370816129e-07, "loss": 0.0027262987568974495, "memory(GiB)": 112.52, "reward": 1.7187500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7187500298023224, "rewards/CineAccuracyORM/std": 0.4177052788436413, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 672, "train_speed(iter/s)": 0.029353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 75.41666984558105, "completions/min_length": 49.5, "epoch": 1.0029784065524945, "grad_norm": 1.6900541535248301, "kl": 0.39453125, "learning_rate": 9.796676694477732e-07, "loss": 0.004088588058948517, "memory(GiB)": 112.52, "reward": 1.6354166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.3717081770300865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 673, "train_speed(iter/s)": 0.029358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/mean_length": 69.71875095367432, "completions/min_length": 47.0, "epoch": 1.0044676098287417, "grad_norm": 1.4564231474463594, "kl": 0.4228515625, "learning_rate": 9.796008607688622e-07, "loss": 0.010697992518544197, "memory(GiB)": 112.52, "reward": 1.3958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.3958333507180214, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 674, "train_speed(iter/s)": 0.029363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 75.53125381469727, "completions/min_length": 49.25, "epoch": 1.0059568131049887, "grad_norm": 1.4306799438614206, "kl": 0.38916015625, "learning_rate": 9.795339447943424e-07, "loss": -0.008235283195972443, "memory(GiB)": 112.52, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 675, "train_speed(iter/s)": 0.029353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/mean_length": 70.70833587646484, "completions/min_length": 49.5, "epoch": 1.007446016381236, "grad_norm": 2.4828926537070775, "kl": 0.4130859375, "learning_rate": 9.79466921539184e-07, "loss": -0.006217879708856344, "memory(GiB)": 112.52, "reward": 1.8854166865348816, "reward_std": 0.12028077617287636, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.1942163035273552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 676, "train_speed(iter/s)": 0.029349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.5, "completions/mean_length": 64.54166889190674, "completions/min_length": 43.25, "epoch": 1.0089352196574832, "grad_norm": 0.006939610783838574, "kl": 0.44384765625, "learning_rate": 9.793997910183813e-07, "loss": 0.00044336653081700206, "memory(GiB)": 112.52, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 677, "train_speed(iter/s)": 0.029346 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 68.67708492279053, "completions/min_length": 49.0, "epoch": 1.0104244229337305, "grad_norm": 0.005690815055013101, "kl": 0.39599609375, "learning_rate": 9.79332553246953e-07, "loss": 0.00039582973113283515, "memory(GiB)": 112.52, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 678, "train_speed(iter/s)": 0.029363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 71.94791984558105, "completions/min_length": 47.0, "epoch": 1.0119136262099777, "grad_norm": 1.6115015406943873, "kl": 0.40771484375, "learning_rate": 9.79265208239941e-07, "loss": -0.007670525461435318, "memory(GiB)": 112.52, "reward": 1.572916716337204, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 679, "train_speed(iter/s)": 0.029368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.25, "completions/mean_length": 72.51041793823242, "completions/min_length": 43.75, "epoch": 1.013402829486225, "grad_norm": 0.6769729160940376, "kl": 0.40771484375, "learning_rate": 9.791977560124118e-07, "loss": 0.00181872199755162, "memory(GiB)": 112.52, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 680, "train_speed(iter/s)": 0.029349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 72.07291984558105, "completions/min_length": 47.0, "epoch": 1.014892032762472, "grad_norm": 1.4018741112656572, "kl": 0.41015625, "learning_rate": 9.791301965794558e-07, "loss": 0.006357999052852392, "memory(GiB)": 112.52, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 681, "train_speed(iter/s)": 0.029355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.5, "completions/mean_length": 68.44791793823242, "completions/min_length": 44.25, "epoch": 1.0163812360387192, "grad_norm": 1.15038632845978, "kl": 0.4169921875, "learning_rate": 9.790625299561871e-07, "loss": 0.003332112915813923, "memory(GiB)": 112.52, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 682, "train_speed(iter/s)": 0.029372 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 69.45833587646484, "completions/min_length": 43.75, "epoch": 1.0178704393149665, "grad_norm": 1.9865974060122484, "kl": 0.39990234375, "learning_rate": 9.789947561577443e-07, "loss": 0.000636633369140327, "memory(GiB)": 112.52, "reward": 1.6562500596046448, "reward_std": 0.11572097800672054, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4674193859100342, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 683, "train_speed(iter/s)": 0.029354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.25, "completions/mean_length": 65.76041984558105, "completions/min_length": 44.75, "epoch": 1.0193596425912137, "grad_norm": 0.006066046402578938, "kl": 0.44091796875, "learning_rate": 9.789268751992893e-07, "loss": 0.0004410889814607799, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 684, "train_speed(iter/s)": 0.029347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 69.35416793823242, "completions/min_length": 46.25, "epoch": 1.020848845867461, "grad_norm": 2.052914522742149, "kl": 0.3896484375, "learning_rate": 9.788588870960088e-07, "loss": -0.0033580930903553963, "memory(GiB)": 112.52, "reward": 1.604166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.4681183323264122, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 685, "train_speed(iter/s)": 0.029331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 68.54166889190674, "completions/min_length": 48.0, "epoch": 1.0223380491437082, "grad_norm": 0.005763677165270626, "kl": 0.41650390625, "learning_rate": 9.787907918631124e-07, "loss": 0.0004165091086179018, "memory(GiB)": 112.52, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 686, "train_speed(iter/s)": 0.029315 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.75, "completions/mean_length": 66.13541793823242, "completions/min_length": 45.5, "epoch": 1.0238272524199554, "grad_norm": 1.1689761674949903, "kl": 0.435546875, "learning_rate": 9.787225895158347e-07, "loss": -0.00608557416126132, "memory(GiB)": 112.52, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 687, "train_speed(iter/s)": 0.02933 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 69.79166793823242, "completions/min_length": 45.5, "epoch": 1.0253164556962024, "grad_norm": 0.9641916687801461, "kl": 0.41552734375, "learning_rate": 9.786542800694334e-07, "loss": -0.012524085119366646, "memory(GiB)": 112.52, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 688, "train_speed(iter/s)": 0.029339 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.25, "completions/mean_length": 69.21875190734863, "completions/min_length": 47.25, "epoch": 1.0268056589724497, "grad_norm": 0.00566397557757721, "kl": 0.4462890625, "learning_rate": 9.785858635391912e-07, "loss": 0.0004456927999854088, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 689, "train_speed(iter/s)": 0.029295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 67.04166793823242, "completions/min_length": 46.25, "epoch": 1.028294862248697, "grad_norm": 1.4317711573577578, "kl": 0.4482421875, "learning_rate": 9.785173399404138e-07, "loss": 0.00011462626571301371, "memory(GiB)": 112.52, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 690, "train_speed(iter/s)": 0.029312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 70.32291984558105, "completions/min_length": 48.75, "epoch": 1.0297840655249442, "grad_norm": 1.194420424590056, "kl": 0.4248046875, "learning_rate": 9.78448709288431e-07, "loss": -0.003116459120064974, "memory(GiB)": 112.52, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 691, "train_speed(iter/s)": 0.029328 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.25, "completions/mean_length": 68.54166889190674, "completions/min_length": 41.75, "epoch": 1.0312732688011914, "grad_norm": 0.005967409605558166, "kl": 0.41064453125, "learning_rate": 9.783799715985972e-07, "loss": 0.0004102778621017933, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 692, "train_speed(iter/s)": 0.029333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 70.05208587646484, "completions/min_length": 47.5, "epoch": 1.0327624720774387, "grad_norm": 0.006172168350937936, "kl": 0.41552734375, "learning_rate": 9.7831112688629e-07, "loss": 0.000415649323258549, "memory(GiB)": 112.52, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 693, "train_speed(iter/s)": 0.029316 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 67.96875381469727, "completions/min_length": 46.75, "epoch": 1.0342516753536857, "grad_norm": 0.006191761119993452, "kl": 0.44970703125, "learning_rate": 9.782421751669112e-07, "loss": 0.0004491031577344984, "memory(GiB)": 112.52, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 694, "train_speed(iter/s)": 0.02931 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.75, "completions/mean_length": 70.89583587646484, "completions/min_length": 44.25, "epoch": 1.035740878629933, "grad_norm": 0.0060117884247672705, "kl": 0.40966796875, "learning_rate": 9.781731164558868e-07, "loss": 0.0004092126328032464, "memory(GiB)": 112.52, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 695, "train_speed(iter/s)": 0.029292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 68.4375, "completions/min_length": 50.25, "epoch": 1.0372300819061802, "grad_norm": 0.006191696263837235, "kl": 0.4375, "learning_rate": 9.781039507686665e-07, "loss": 0.0004373512929305434, "memory(GiB)": 112.52, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 696, "train_speed(iter/s)": 0.029297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.0, "completions/mean_length": 69.94791984558105, "completions/min_length": 43.75, "epoch": 1.0387192851824274, "grad_norm": 0.005822846266680209, "kl": 0.43115234375, "learning_rate": 9.780346781207236e-07, "loss": 0.0004316337872296572, "memory(GiB)": 112.52, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 697, "train_speed(iter/s)": 0.029275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 68.85417079925537, "completions/min_length": 45.5, "epoch": 1.0402084884586746, "grad_norm": 0.006714086561186041, "kl": 0.44921875, "learning_rate": 9.779652985275562e-07, "loss": 0.0004489232669584453, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 698, "train_speed(iter/s)": 0.029279 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 66.37500095367432, "completions/min_length": 46.25, "epoch": 1.0416976917349219, "grad_norm": 0.005978816760332062, "kl": 0.4501953125, "learning_rate": 9.778958120046852e-07, "loss": 0.0004502245574258268, "memory(GiB)": 112.52, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 699, "train_speed(iter/s)": 0.029274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 71.79166984558105, "completions/min_length": 48.75, "epoch": 1.0431868950111691, "grad_norm": 0.005838385536257068, "kl": 0.419921875, "learning_rate": 9.778262185676567e-07, "loss": 0.0004207124875392765, "memory(GiB)": 112.52, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 700, "train_speed(iter/s)": 0.029269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.0, "completions/mean_length": 69.52083587646484, "completions/min_length": 49.0, "epoch": 1.0446760982874161, "grad_norm": 1.6904957072833753, "kl": 0.4287109375, "learning_rate": 9.777565182320396e-07, "loss": 0.004336303565651178, "memory(GiB)": 112.52, "reward": 1.6041666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 701, "train_speed(iter/s)": 0.029263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 73.09375190734863, "completions/min_length": 44.5, "epoch": 1.0461653015636634, "grad_norm": 1.637510763486496, "kl": 0.4208984375, "learning_rate": 9.776867110134271e-07, "loss": 0.005247925408184528, "memory(GiB)": 112.52, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 702, "train_speed(iter/s)": 0.029268 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.25, "completions/mean_length": 69.06250381469727, "completions/min_length": 47.25, "epoch": 1.0476545048399106, "grad_norm": 2.2699509949871497, "kl": 0.44677734375, "learning_rate": 9.776167969274366e-07, "loss": 0.010403496213257313, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.4619346410036087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 703, "train_speed(iter/s)": 0.029274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.75, "completions/mean_length": 68.58333587646484, "completions/min_length": 41.25, "epoch": 1.0491437081161579, "grad_norm": 0.006678019539890669, "kl": 0.4140625, "learning_rate": 9.775467759897092e-07, "loss": 0.00041445353417657316, "memory(GiB)": 112.52, "reward": 1.3333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.3333333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 704, "train_speed(iter/s)": 0.02927 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.25, "completions/mean_length": 67.65625381469727, "completions/min_length": 48.75, "epoch": 1.0506329113924051, "grad_norm": 1.1327755920045026, "kl": 0.453125, "learning_rate": 9.774766482159095e-07, "loss": 0.0067716664634644985, "memory(GiB)": 112.52, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 705, "train_speed(iter/s)": 0.029264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.5, "completions/mean_length": 68.66666984558105, "completions/min_length": 44.25, "epoch": 1.0521221146686524, "grad_norm": 1.288823643022014, "kl": 0.44287109375, "learning_rate": 9.77406413621727e-07, "loss": 0.015593113377690315, "memory(GiB)": 112.52, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 706, "train_speed(iter/s)": 0.029249 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/mean_length": 67.64583587646484, "completions/min_length": 45.0, "epoch": 1.0536113179448994, "grad_norm": 0.006501554746655152, "kl": 0.45361328125, "learning_rate": 9.77336072222874e-07, "loss": 0.0004533611354418099, "memory(GiB)": 112.52, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 707, "train_speed(iter/s)": 0.029264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 67.94791793823242, "completions/min_length": 46.25, "epoch": 1.0551005212211466, "grad_norm": 0.006339437758457424, "kl": 0.43359375, "learning_rate": 9.772656240350875e-07, "loss": 0.0004339042061474174, "memory(GiB)": 112.52, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 708, "train_speed(iter/s)": 0.029264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.5, "completions/mean_length": 68.37500381469727, "completions/min_length": 47.75, "epoch": 1.0565897244973939, "grad_norm": 0.007442224045514388, "kl": 0.4619140625, "learning_rate": 9.771950690741276e-07, "loss": 0.0004617433005478233, "memory(GiB)": 112.52, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 709, "train_speed(iter/s)": 0.029259 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 65.70833492279053, "completions/min_length": 47.75, "epoch": 1.058078927773641, "grad_norm": 0.007012232341455376, "kl": 0.4296875, "learning_rate": 9.77124407355779e-07, "loss": 0.00042986200423911214, "memory(GiB)": 112.52, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 710, "train_speed(iter/s)": 0.02926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.75, "completions/mean_length": 63.19791793823242, "completions/min_length": 38.75, "epoch": 1.0595681310498883, "grad_norm": 0.8329447987003084, "kl": 0.47216796875, "learning_rate": 9.770536388958505e-07, "loss": 0.004542951937764883, "memory(GiB)": 112.52, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 711, "train_speed(iter/s)": 0.029253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.75, "completions/mean_length": 66.34375286102295, "completions/min_length": 40.25, "epoch": 1.0610573343261356, "grad_norm": 1.1849618557486772, "kl": 0.43017578125, "learning_rate": 9.769827637101736e-07, "loss": 0.006189762614667416, "memory(GiB)": 112.52, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 712, "train_speed(iter/s)": 0.029258 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.5, "completions/mean_length": 67.23958587646484, "completions/min_length": 45.25, "epoch": 1.0625465376023828, "grad_norm": 2.4319825337121697, "kl": 0.447265625, "learning_rate": 9.769117818146047e-07, "loss": -0.0036237197928130627, "memory(GiB)": 112.52, "reward": 1.6458334028720856, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.48409245908260345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 713, "train_speed(iter/s)": 0.029253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.25, "completions/mean_length": 70.37500381469727, "completions/min_length": 44.0, "epoch": 1.0640357408786298, "grad_norm": 0.006224491428081048, "kl": 0.4423828125, "learning_rate": 9.768406932250239e-07, "loss": 0.00044234748929739, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 714, "train_speed(iter/s)": 0.029257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 71.30208778381348, "completions/min_length": 45.0, "epoch": 1.065524944154877, "grad_norm": 1.236319097141928, "kl": 0.41796875, "learning_rate": 9.76769497957335e-07, "loss": 0.00796523503959179, "memory(GiB)": 112.52, "reward": 1.6458333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 715, "train_speed(iter/s)": 0.029251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.5, "completions/mean_length": 65.51041984558105, "completions/min_length": 39.25, "epoch": 1.0670141474311243, "grad_norm": 1.0227423715739947, "kl": 0.4736328125, "learning_rate": 9.766981960274652e-07, "loss": -0.0034252568148076534, "memory(GiB)": 112.52, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 716, "train_speed(iter/s)": 0.029257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.75, "completions/mean_length": 62.66666793823242, "completions/min_length": 39.75, "epoch": 1.0685033507073716, "grad_norm": 0.006415605650621734, "kl": 0.44482421875, "learning_rate": 9.766267874513667e-07, "loss": 0.0004450420383363962, "memory(GiB)": 112.52, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 717, "train_speed(iter/s)": 0.029269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 67.86458778381348, "completions/min_length": 40.5, "epoch": 1.0699925539836188, "grad_norm": 1.3397998197171184, "kl": 0.45263671875, "learning_rate": 9.765552722450147e-07, "loss": 0.003528343513607979, "memory(GiB)": 112.52, "reward": 1.6145833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6145833488553762, "rewards/CineAccuracyORM/std": 0.30704472959041595, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 718, "train_speed(iter/s)": 0.029253 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.25, "completions/mean_length": 68.07291793823242, "completions/min_length": 49.25, "epoch": 1.071481757259866, "grad_norm": 1.8155922421424775, "kl": 0.4482421875, "learning_rate": 9.764836504244085e-07, "loss": 0.00649466086179018, "memory(GiB)": 112.52, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 719, "train_speed(iter/s)": 0.029248 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.5, "completions/mean_length": 64.36458492279053, "completions/min_length": 45.5, "epoch": 1.072970960536113, "grad_norm": 1.2257164334368675, "kl": 0.44580078125, "learning_rate": 9.76411922005571e-07, "loss": -0.0054454561322927475, "memory(GiB)": 112.52, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 720, "train_speed(iter/s)": 0.029255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 65.17708492279053, "completions/min_length": 41.25, "epoch": 1.0744601638123603, "grad_norm": 0.005873154391546992, "kl": 0.455078125, "learning_rate": 9.763400870045495e-07, "loss": 0.0004554353072308004, "memory(GiB)": 112.52, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 721, "train_speed(iter/s)": 0.029261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.25, "completions/mean_length": 64.25000190734863, "completions/min_length": 46.0, "epoch": 1.0759493670886076, "grad_norm": 1.6564671168257716, "kl": 0.4375, "learning_rate": 9.762681454374146e-07, "loss": -0.0059361765161156654, "memory(GiB)": 112.52, "reward": 1.6458333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.44130611419677734, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 722, "train_speed(iter/s)": 0.029255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.5, "completions/mean_length": 66.25, "completions/min_length": 41.75, "epoch": 1.0774385703648548, "grad_norm": 0.00598213806626121, "kl": 0.44677734375, "learning_rate": 9.761960973202612e-07, "loss": 0.00044678105041384697, "memory(GiB)": 112.52, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 723, "train_speed(iter/s)": 0.029272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/mean_length": 68.18750381469727, "completions/min_length": 46.5, "epoch": 1.078927773641102, "grad_norm": 0.0059931057765278605, "kl": 0.40185546875, "learning_rate": 9.761239426692076e-07, "loss": 0.0004025431699119508, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 724, "train_speed(iter/s)": 0.029288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 64.56250381469727, "completions/min_length": 42.75, "epoch": 1.0804169769173493, "grad_norm": 1.5997346273321114, "kl": 0.46484375, "learning_rate": 9.760516815003964e-07, "loss": 0.0026706333737820387, "memory(GiB)": 112.52, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 725, "train_speed(iter/s)": 0.029283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 65.40625190734863, "completions/min_length": 42.75, "epoch": 1.0819061801935965, "grad_norm": 1.6131343297594947, "kl": 0.4482421875, "learning_rate": 9.759793138299933e-07, "loss": -0.0028339242562651634, "memory(GiB)": 112.52, "reward": 1.4895833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.4895833386108279, "rewards/CineAccuracyORM/std": 0.2819983549416065, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 726, "train_speed(iter/s)": 0.029277 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.25, "completions/mean_length": 65.77083587646484, "completions/min_length": 43.25, "epoch": 1.0833953834698435, "grad_norm": 0.005693146832569588, "kl": 0.4306640625, "learning_rate": 9.759068396741884e-07, "loss": 0.00043142231879755855, "memory(GiB)": 112.52, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 727, "train_speed(iter/s)": 0.029283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 64.39583396911621, "completions/min_length": 41.0, "epoch": 1.0848845867460908, "grad_norm": 0.0058959730358347055, "kl": 0.45263671875, "learning_rate": 9.75834259049196e-07, "loss": 0.0004529778379946947, "memory(GiB)": 112.52, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 728, "train_speed(iter/s)": 0.029299 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.75, "completions/mean_length": 67.65625095367432, "completions/min_length": 45.5, "epoch": 1.086373790022338, "grad_norm": 1.739035788199708, "kl": 0.46240234375, "learning_rate": 9.757615719712532e-07, "loss": 0.0074521745555102825, "memory(GiB)": 112.52, "reward": 1.6770833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3182126581668854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 729, "train_speed(iter/s)": 0.029278 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.75, "completions/mean_length": 63.645835876464844, "completions/min_length": 42.0, "epoch": 1.0878629932985853, "grad_norm": 2.022507803166956, "kl": 0.4521484375, "learning_rate": 9.756887784566215e-07, "loss": 0.0036213784478604794, "memory(GiB)": 112.52, "reward": 1.5520833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4595789238810539, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 730, "train_speed(iter/s)": 0.029285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.5, "completions/mean_length": 64.29166889190674, "completions/min_length": 46.75, "epoch": 1.0893521965748325, "grad_norm": 0.006211127036604458, "kl": 0.435546875, "learning_rate": 9.756158785215866e-07, "loss": 0.0004356003482826054, "memory(GiB)": 112.52, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 731, "train_speed(iter/s)": 0.02928 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/mean_length": 67.88541793823242, "completions/min_length": 46.0, "epoch": 1.0908413998510798, "grad_norm": 0.005632490794808218, "kl": 0.4111328125, "learning_rate": 9.755428721824571e-07, "loss": 0.00041133942431770265, "memory(GiB)": 112.52, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 732, "train_speed(iter/s)": 0.029285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.75, "completions/mean_length": 63.125, "completions/min_length": 41.5, "epoch": 1.0923306031273268, "grad_norm": 1.520187601743356, "kl": 0.4326171875, "learning_rate": 9.75469759455566e-07, "loss": 0.0012383114080876112, "memory(GiB)": 112.52, "reward": 1.5208333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.49164988845586777, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 733, "train_speed(iter/s)": 0.029279 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 66.66666793823242, "completions/min_length": 43.5, "epoch": 1.093819806403574, "grad_norm": 1.3841025799191002, "kl": 0.41796875, "learning_rate": 9.753965403572702e-07, "loss": -0.004391858354210854, "memory(GiB)": 112.52, "reward": 1.5937500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.43859851360321045, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 734, "train_speed(iter/s)": 0.029295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.25, "completions/mean_length": 63.52083492279053, "completions/min_length": 43.5, "epoch": 1.0953090096798213, "grad_norm": 1.1652324393816222, "kl": 0.4462890625, "learning_rate": 9.7532321490395e-07, "loss": 0.0034000021405518055, "memory(GiB)": 112.52, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 735, "train_speed(iter/s)": 0.029311 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.75, "completions/mean_length": 65.76041793823242, "completions/min_length": 43.75, "epoch": 1.0967982129560685, "grad_norm": 0.005526831314731009, "kl": 0.4462890625, "learning_rate": 9.752497831120094e-07, "loss": 0.00044573561172001064, "memory(GiB)": 112.52, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 736, "train_speed(iter/s)": 0.029326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 66.52083492279053, "completions/min_length": 44.5, "epoch": 1.0982874162323157, "grad_norm": 1.4765632754900533, "kl": 0.44677734375, "learning_rate": 9.751762449978766e-07, "loss": 0.02163606509566307, "memory(GiB)": 112.52, "reward": 1.6562500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3735327273607254, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 737, "train_speed(iter/s)": 0.02932 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.5, "completions/mean_length": 63.30208492279053, "completions/min_length": 42.0, "epoch": 1.099776619508563, "grad_norm": 1.5044060067086864, "kl": 0.474609375, "learning_rate": 9.75102600578004e-07, "loss": 0.0026131384074687958, "memory(GiB)": 112.52, "reward": 1.760416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.4023842103779316, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 738, "train_speed(iter/s)": 0.029323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.5, "completions/mean_length": 63.82291889190674, "completions/min_length": 38.75, "epoch": 1.1012658227848102, "grad_norm": 4.687507858376988, "kl": 0.46484375, "learning_rate": 9.750288498688664e-07, "loss": 3.9443373680114746e-05, "memory(GiB)": 112.52, "reward": 1.5625000596046448, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.5625000102445483, "rewards/CineAccuracyORM/std": 0.2986612282693386, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 739, "train_speed(iter/s)": 0.029316 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.25, "completions/mean_length": 59.13541793823242, "completions/min_length": 39.0, "epoch": 1.1027550260610572, "grad_norm": 0.006730048202212281, "kl": 0.44970703125, "learning_rate": 9.749549928869635e-07, "loss": 0.0004496354958973825, "memory(GiB)": 112.52, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 740, "train_speed(iter/s)": 0.029333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.5, "completions/mean_length": 59.83333492279053, "completions/min_length": 42.0, "epoch": 1.1042442293373045, "grad_norm": 1.283238116724052, "kl": 0.46630859375, "learning_rate": 9.748810296488189e-07, "loss": 0.010365823283791542, "memory(GiB)": 112.52, "reward": 1.6979166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 741, "train_speed(iter/s)": 0.029338 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.25, "completions/mean_length": 64.47916984558105, "completions/min_length": 42.75, "epoch": 1.1057334326135517, "grad_norm": 1.358476656331574, "kl": 0.42626953125, "learning_rate": 9.74806960170979e-07, "loss": -0.0012757668737322092, "memory(GiB)": 112.52, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 742, "train_speed(iter/s)": 0.029327 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 60.64583492279053, "completions/min_length": 39.5, "epoch": 1.107222635889799, "grad_norm": 0.005886207430708861, "kl": 0.4560546875, "learning_rate": 9.747327844700146e-07, "loss": 0.0004554603947326541, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 743, "train_speed(iter/s)": 0.029318 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 62.10416889190674, "completions/min_length": 36.0, "epoch": 1.1087118391660462, "grad_norm": 0.7902871670493362, "kl": 0.44580078125, "learning_rate": 9.746585025625205e-07, "loss": 0.005539005622267723, "memory(GiB)": 112.52, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 744, "train_speed(iter/s)": 0.029323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 61.07291793823242, "completions/min_length": 39.75, "epoch": 1.1102010424422935, "grad_norm": 0.006010492895440083, "kl": 0.45703125, "learning_rate": 9.745841144651146e-07, "loss": 0.0004569964949041605, "memory(GiB)": 112.52, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 745, "train_speed(iter/s)": 0.029316 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.5, "completions/mean_length": 58.18750190734863, "completions/min_length": 38.25, "epoch": 1.1116902457185405, "grad_norm": 0.006491301816763813, "kl": 0.470703125, "learning_rate": 9.745096201944389e-07, "loss": 0.00047130556777119637, "memory(GiB)": 112.52, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 746, "train_speed(iter/s)": 0.029333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/mean_length": 61.145835876464844, "completions/min_length": 39.5, "epoch": 1.1131794489947877, "grad_norm": 1.2204951318763833, "kl": 0.47705078125, "learning_rate": 9.744350197671597e-07, "loss": -0.002255021594464779, "memory(GiB)": 112.52, "reward": 1.510416716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.43859851360321045, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 747, "train_speed(iter/s)": 0.029338 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.25, "completions/mean_length": 62.468750953674316, "completions/min_length": 37.0, "epoch": 1.114668652271035, "grad_norm": 1.831504847771253, "kl": 0.46875, "learning_rate": 9.743603131999655e-07, "loss": -0.013274955563247204, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.4845366030931473, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 748, "train_speed(iter/s)": 0.029331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 62.364585876464844, "completions/min_length": 39.0, "epoch": 1.1161578555472822, "grad_norm": 1.3422535175345012, "kl": 0.4423828125, "learning_rate": 9.742855005095704e-07, "loss": 0.004486247897148132, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 749, "train_speed(iter/s)": 0.029335 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.75, "completions/mean_length": 59.770835876464844, "completions/min_length": 37.75, "epoch": 1.1176470588235294, "grad_norm": 1.2036151353431512, "kl": 0.47998046875, "learning_rate": 9.742105817127114e-07, "loss": -0.010957407765090466, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 750, "train_speed(iter/s)": 0.029342 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/mean_length": 63.62500190734863, "completions/min_length": 44.0, "epoch": 1.1191362620997767, "grad_norm": 1.2781202415239294, "kl": 0.44677734375, "learning_rate": 9.741355568261484e-07, "loss": -0.012312627397477627, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 751, "train_speed(iter/s)": 0.029347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.25, "completions/mean_length": 59.08333492279053, "completions/min_length": 37.5, "epoch": 1.120625465376024, "grad_norm": 1.6336050115625596, "kl": 0.50732421875, "learning_rate": 9.740604258666668e-07, "loss": -0.00393360061571002, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 752, "train_speed(iter/s)": 0.029353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 60.96875, "completions/min_length": 42.25, "epoch": 1.122114668652271, "grad_norm": 1.252350968873114, "kl": 0.49755859375, "learning_rate": 9.73985188851074e-07, "loss": 0.003685562638565898, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 753, "train_speed(iter/s)": 0.029337 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.25, "completions/mean_length": 60.07291793823242, "completions/min_length": 37.5, "epoch": 1.1236038719285182, "grad_norm": 1.660511121725767, "kl": 0.4482421875, "learning_rate": 9.739098457962024e-07, "loss": 0.00382750341668725, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 754, "train_speed(iter/s)": 0.029342 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/mean_length": 59.09375190734863, "completions/min_length": 33.5, "epoch": 1.1250930752047654, "grad_norm": 1.0203733639445796, "kl": 0.48876953125, "learning_rate": 9.738343967189077e-07, "loss": 0.003941708710044622, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 755, "train_speed(iter/s)": 0.029326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.5, "completions/mean_length": 60.79166793823242, "completions/min_length": 38.25, "epoch": 1.1265822784810127, "grad_norm": 0.8606265165672892, "kl": 0.4716796875, "learning_rate": 9.737588416360692e-07, "loss": 0.0053321681916713715, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 756, "train_speed(iter/s)": 0.029342 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.75, "completions/mean_length": 60.46875190734863, "completions/min_length": 40.0, "epoch": 1.12807148175726, "grad_norm": 1.2213491956911933, "kl": 0.453125, "learning_rate": 9.736831805645895e-07, "loss": 0.0058359685353934765, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 757, "train_speed(iter/s)": 0.029338 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.25, "completions/mean_length": 59.437500953674316, "completions/min_length": 38.0, "epoch": 1.1295606850335072, "grad_norm": 1.5331636014950287, "kl": 0.49755859375, "learning_rate": 9.73607413521396e-07, "loss": 0.0021944097243249416, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7708333507180214, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 758, "train_speed(iter/s)": 0.029354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/mean_length": 61.48958492279053, "completions/min_length": 40.0, "epoch": 1.1310498883097542, "grad_norm": 0.006273402282125467, "kl": 0.4853515625, "learning_rate": 9.73531540523439e-07, "loss": 0.00048520154086872935, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 759, "train_speed(iter/s)": 0.029357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 58.19791889190674, "completions/min_length": 35.25, "epoch": 1.1325390915860014, "grad_norm": 0.006475806617181546, "kl": 0.498046875, "learning_rate": 9.734555615876923e-07, "loss": 0.000498082663398236, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 760, "train_speed(iter/s)": 0.02935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.5, "completions/mean_length": 58.093750953674316, "completions/min_length": 38.0, "epoch": 1.1340282948622487, "grad_norm": 1.1898058973117898, "kl": 0.4619140625, "learning_rate": 9.733794767311543e-07, "loss": -0.0066871545277535915, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 761, "train_speed(iter/s)": 0.029345 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 61.44791889190674, "completions/min_length": 39.0, "epoch": 1.135517498138496, "grad_norm": 0.006561330186678657, "kl": 0.4677734375, "learning_rate": 9.733032859708465e-07, "loss": 0.0004677774559240788, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 762, "train_speed(iter/s)": 0.02934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.75, "completions/mean_length": 61.72916793823242, "completions/min_length": 42.0, "epoch": 1.1370067014147431, "grad_norm": 1.9621883181579551, "kl": 0.4775390625, "learning_rate": 9.732269893238142e-07, "loss": -0.0026870532892644405, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 763, "train_speed(iter/s)": 0.029346 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 60.59375190734863, "completions/min_length": 40.75, "epoch": 1.1384959046909904, "grad_norm": 0.006142671413081264, "kl": 0.48828125, "learning_rate": 9.731505868071262e-07, "loss": 0.0004885430680587888, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 764, "train_speed(iter/s)": 0.02935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.25, "completions/mean_length": 59.60416793823242, "completions/min_length": 35.75, "epoch": 1.1399851079672376, "grad_norm": 0.0062371667357103465, "kl": 0.4453125, "learning_rate": 9.730740784378752e-07, "loss": 0.0004457782197277993, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 765, "train_speed(iter/s)": 0.029356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.25, "completions/mean_length": 55.73958492279053, "completions/min_length": 36.0, "epoch": 1.1414743112434846, "grad_norm": 1.734078429181459, "kl": 0.5146484375, "learning_rate": 9.729974642331775e-07, "loss": -0.0027306005358695984, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4785975143313408, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 766, "train_speed(iter/s)": 0.029352 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.75, "completions/mean_length": 64.07291793823242, "completions/min_length": 39.0, "epoch": 1.1429635145197319, "grad_norm": 1.2328640386811902, "kl": 0.45751953125, "learning_rate": 9.729207442101735e-07, "loss": 0.006636315491050482, "memory(GiB)": 112.53, "reward": 1.5416667461395264, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.48409245908260345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 767, "train_speed(iter/s)": 0.029356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.5, "completions/mean_length": 60.89583492279053, "completions/min_length": 37.25, "epoch": 1.1444527177959791, "grad_norm": 0.006377655582085902, "kl": 0.47705078125, "learning_rate": 9.728439183860265e-07, "loss": 0.00047767904470674694, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 768, "train_speed(iter/s)": 0.029363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.25, "completions/mean_length": 58.843750953674316, "completions/min_length": 35.5, "epoch": 1.1459419210722264, "grad_norm": 0.006877905310823571, "kl": 0.47314453125, "learning_rate": 9.727669867779239e-07, "loss": 0.00047287807683460414, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 769, "train_speed(iter/s)": 0.029378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/mean_length": 57.70833396911621, "completions/min_length": 41.25, "epoch": 1.1474311243484736, "grad_norm": 0.006509109204660749, "kl": 0.46728515625, "learning_rate": 9.726899494030766e-07, "loss": 0.0004672952345572412, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 770, "train_speed(iter/s)": 0.029364 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.75, "completions/mean_length": 61.00000190734863, "completions/min_length": 40.25, "epoch": 1.1489203276247208, "grad_norm": 1.3933398486103175, "kl": 0.46044921875, "learning_rate": 9.726128062787197e-07, "loss": -0.0014789080014452338, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 771, "train_speed(iter/s)": 0.029359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.5, "completions/mean_length": 60.54166889190674, "completions/min_length": 40.0, "epoch": 1.1504095309009679, "grad_norm": 1.0139732235026437, "kl": 0.4814453125, "learning_rate": 9.725355574221114e-07, "loss": -0.005041593685746193, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 772, "train_speed(iter/s)": 0.029355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.5, "completions/mean_length": 61.968750953674316, "completions/min_length": 41.0, "epoch": 1.1518987341772151, "grad_norm": 0.9065393624847999, "kl": 0.4755859375, "learning_rate": 9.724582028505335e-07, "loss": -0.0012578102760016918, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 773, "train_speed(iter/s)": 0.029359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/mean_length": 61.66666793823242, "completions/min_length": 40.75, "epoch": 1.1533879374534624, "grad_norm": 1.2732490093049134, "kl": 0.45361328125, "learning_rate": 9.723807425812918e-07, "loss": 0.00906978640705347, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 774, "train_speed(iter/s)": 0.029375 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.75, "completions/mean_length": 64.39583587646484, "completions/min_length": 43.0, "epoch": 1.1548771407297096, "grad_norm": 1.5622339921314956, "kl": 0.462890625, "learning_rate": 9.723031766317156e-07, "loss": -0.0006427179905585945, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 775, "train_speed(iter/s)": 0.029381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.5, "completions/mean_length": 62.82291889190674, "completions/min_length": 42.5, "epoch": 1.1563663440059568, "grad_norm": 1.6070127841170176, "kl": 0.4921875, "learning_rate": 9.722255050191578e-07, "loss": 0.004471511580049992, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.48409245908260345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 776, "train_speed(iter/s)": 0.029375 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.25, "completions/mean_length": 59.82291793823242, "completions/min_length": 39.0, "epoch": 1.157855547282204, "grad_norm": 1.0422936097702011, "kl": 0.4736328125, "learning_rate": 9.721477277609951e-07, "loss": 0.005552046932280064, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 777, "train_speed(iter/s)": 0.02939 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.75, "completions/mean_length": 55.81250190734863, "completions/min_length": 36.5, "epoch": 1.1593447505584513, "grad_norm": 2.4641645952617615, "kl": 0.5380859375, "learning_rate": 9.720698448746277e-07, "loss": 0.006839222274720669, "memory(GiB)": 112.53, "reward": 1.3437500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.3437500149011612, "rewards/CineAccuracyORM/std": 0.3735327273607254, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 778, "train_speed(iter/s)": 0.029386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 60.37500190734863, "completions/min_length": 38.75, "epoch": 1.1608339538346983, "grad_norm": 1.82762302815976, "kl": 0.4384765625, "learning_rate": 9.719918563774792e-07, "loss": 0.005848039872944355, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 779, "train_speed(iter/s)": 0.029391 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.5, "completions/mean_length": 58.20833492279053, "completions/min_length": 38.75, "epoch": 1.1623231571109456, "grad_norm": 1.3982058476247872, "kl": 0.490234375, "learning_rate": 9.719137622869971e-07, "loss": 0.0029087234288454056, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5208333414047956, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 780, "train_speed(iter/s)": 0.029396 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/mean_length": 56.29166793823242, "completions/min_length": 38.25, "epoch": 1.1638123603871928, "grad_norm": 2.0897804228446666, "kl": 0.48681640625, "learning_rate": 9.71835562620653e-07, "loss": 0.012540195137262344, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3699222281575203, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 781, "train_speed(iter/s)": 0.029405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 59.86458492279053, "completions/min_length": 37.25, "epoch": 1.16530156366344, "grad_norm": 0.006215434804862654, "kl": 0.48046875, "learning_rate": 9.71757257395941e-07, "loss": 0.00048059792607091367, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 782, "train_speed(iter/s)": 0.029399 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/mean_length": 60.85416889190674, "completions/min_length": 39.75, "epoch": 1.1667907669396873, "grad_norm": 1.8715266697769284, "kl": 0.47314453125, "learning_rate": 9.716788466303798e-07, "loss": 0.007628454826772213, "memory(GiB)": 112.53, "reward": 1.885416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8854166865348816, "rewards/CineAccuracyORM/std": 0.24199935421347618, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 783, "train_speed(iter/s)": 0.029414 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.75, "completions/mean_length": 54.94791793823242, "completions/min_length": 38.0, "epoch": 1.1682799702159345, "grad_norm": 1.2916474816612962, "kl": 0.52197265625, "learning_rate": 9.71600330341511e-07, "loss": 0.0012122858315706253, "memory(GiB)": 112.53, "reward": 1.8645833432674408, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22665787488222122, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 784, "train_speed(iter/s)": 0.029424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.5, "completions/mean_length": 55.031250953674316, "completions/min_length": 36.25, "epoch": 1.1697691734921816, "grad_norm": 1.5521427365617646, "kl": 0.48828125, "learning_rate": 9.715217085469007e-07, "loss": 0.001346539705991745, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000298023224, "rewards/CineAccuracyORM/std": 0.40787915512919426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 785, "train_speed(iter/s)": 0.029429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.25, "completions/mean_length": 57.81250190734863, "completions/min_length": 38.0, "epoch": 1.1712583767684288, "grad_norm": 0.006432802760521936, "kl": 0.4951171875, "learning_rate": 9.714429812641376e-07, "loss": 0.0004952105227857828, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 786, "train_speed(iter/s)": 0.029444 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.0, "completions/mean_length": 58.55208492279053, "completions/min_length": 40.75, "epoch": 1.172747580044676, "grad_norm": 1.3709720441033806, "kl": 0.46240234375, "learning_rate": 9.713641485108347e-07, "loss": -0.002960743848234415, "memory(GiB)": 112.53, "reward": 1.447916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 787, "train_speed(iter/s)": 0.029459 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.75, "completions/mean_length": 57.66666793823242, "completions/min_length": 36.5, "epoch": 1.1742367833209233, "grad_norm": 1.333774941702466, "kl": 0.490234375, "learning_rate": 9.71285210304628e-07, "loss": -0.0006777780945412815, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 788, "train_speed(iter/s)": 0.029474 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.5, "completions/mean_length": 58.17708492279053, "completions/min_length": 39.75, "epoch": 1.1757259865971705, "grad_norm": 2.069588993631242, "kl": 0.490234375, "learning_rate": 9.712061666631777e-07, "loss": -0.003677036613225937, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 789, "train_speed(iter/s)": 0.029469 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/mean_length": 57.23958492279053, "completions/min_length": 37.5, "epoch": 1.1772151898734178, "grad_norm": 2.0908009478171743, "kl": 0.47119140625, "learning_rate": 9.711270176041674e-07, "loss": 0.0017317150486633182, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.11258215829730034, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3582116588950157, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 790, "train_speed(iter/s)": 0.029465 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 59.04166793823242, "completions/min_length": 33.5, "epoch": 1.178704393149665, "grad_norm": 1.457835744912032, "kl": 0.525390625, "learning_rate": 9.710477631453043e-07, "loss": 0.00543229840695858, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 791, "train_speed(iter/s)": 0.029449 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 59.229169845581055, "completions/min_length": 36.75, "epoch": 1.1801935964259123, "grad_norm": 0.006527043770751839, "kl": 0.4931640625, "learning_rate": 9.709684033043186e-07, "loss": 0.0004931283183395863, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 792, "train_speed(iter/s)": 0.029443 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/mean_length": 60.343750953674316, "completions/min_length": 39.25, "epoch": 1.1816827997021593, "grad_norm": 0.006695903574914143, "kl": 0.47265625, "learning_rate": 9.70888938098965e-07, "loss": 0.00047240275307558477, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 793, "train_speed(iter/s)": 0.029447 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.75, "completions/mean_length": 56.38541889190674, "completions/min_length": 38.75, "epoch": 1.1831720029784065, "grad_norm": 1.1277437317316659, "kl": 0.50244140625, "learning_rate": 9.708093675470212e-07, "loss": -0.003481445601209998, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 794, "train_speed(iter/s)": 0.029452 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 80.5, "completions/mean_length": 58.000000953674316, "completions/min_length": 38.25, "epoch": 1.1846612062546538, "grad_norm": 1.4094152995689926, "kl": 0.5048828125, "learning_rate": 9.707296916662887e-07, "loss": -0.003771485760807991, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 795, "train_speed(iter/s)": 0.029468 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.5, "completions/mean_length": 57.05208396911621, "completions/min_length": 36.25, "epoch": 1.186150409530901, "grad_norm": 2.0944635151481603, "kl": 0.5263671875, "learning_rate": 9.706499104745924e-07, "loss": 0.0008514676010236144, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 796, "train_speed(iter/s)": 0.029452 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.5, "completions/mean_length": 56.86458396911621, "completions/min_length": 34.75, "epoch": 1.1876396128071482, "grad_norm": 0.007554829678816293, "kl": 0.5322265625, "learning_rate": 9.705700239897807e-07, "loss": 0.0005321508506312966, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 797, "train_speed(iter/s)": 0.029446 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.25, "completions/mean_length": 59.72916793823242, "completions/min_length": 39.0, "epoch": 1.1891288160833953, "grad_norm": 1.3454420126261164, "kl": 0.4970703125, "learning_rate": 9.70490032229726e-07, "loss": -0.008946424350142479, "memory(GiB)": 112.53, "reward": 1.385416716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.385416679084301, "rewards/CineAccuracyORM/std": 0.45113223791122437, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 798, "train_speed(iter/s)": 0.029423 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 60.67708492279053, "completions/min_length": 36.25, "epoch": 1.1906180193596425, "grad_norm": 1.9544768991260486, "kl": 0.4716796875, "learning_rate": 9.704099352123236e-07, "loss": 0.004544656723737717, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.10661446303129196, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 799, "train_speed(iter/s)": 0.029428 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.25, "completions/mean_length": 59.57291793823242, "completions/min_length": 39.25, "epoch": 1.1921072226358898, "grad_norm": 1.7638225898827722, "kl": 0.5166015625, "learning_rate": 9.703297329554929e-07, "loss": 0.009037001058459282, "memory(GiB)": 112.53, "reward": 1.4166666865348816, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.4166666865348816, "rewards/CineAccuracyORM/std": 0.34273428469896317, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 800, "train_speed(iter/s)": 0.029416 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.75, "completions/mean_length": 59.44791793823242, "completions/min_length": 38.25, "epoch": 1.193596425912137, "grad_norm": 1.0996585635624956, "kl": 0.48388671875, "learning_rate": 9.702494254771767e-07, "loss": 0.0005252610426396132, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 801, "train_speed(iter/s)": 0.02942 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 61.687500953674316, "completions/min_length": 38.0, "epoch": 1.1950856291883842, "grad_norm": 0.00703183190913148, "kl": 0.4697265625, "learning_rate": 9.70169012795341e-07, "loss": 0.000469366175821051, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 802, "train_speed(iter/s)": 0.029423 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.5, "completions/mean_length": 59.093750953674316, "completions/min_length": 42.0, "epoch": 1.1965748324646315, "grad_norm": 0.007624300771302505, "kl": 0.490234375, "learning_rate": 9.700884949279759e-07, "loss": 0.0004900441854260862, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 803, "train_speed(iter/s)": 0.029428 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.25, "completions/mean_length": 59.14583492279053, "completions/min_length": 38.0, "epoch": 1.1980640357408787, "grad_norm": 1.411566526306602, "kl": 0.47802734375, "learning_rate": 9.700078718930945e-07, "loss": 0.0029355348087847233, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 804, "train_speed(iter/s)": 0.029433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.25, "completions/mean_length": 59.000000953674316, "completions/min_length": 39.75, "epoch": 1.199553239017126, "grad_norm": 0.007462923274784577, "kl": 0.48681640625, "learning_rate": 9.699271437087338e-07, "loss": 0.00048585128388367593, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 805, "train_speed(iter/s)": 0.029438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/mean_length": 62.05208396911621, "completions/min_length": 39.5, "epoch": 1.201042442293373, "grad_norm": 2.0207006567951957, "kl": 0.48388671875, "learning_rate": 9.698463103929541e-07, "loss": -0.0027144819032400846, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3582116588950157, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 806, "train_speed(iter/s)": 0.029442 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.25, "completions/mean_length": 57.40625190734863, "completions/min_length": 41.25, "epoch": 1.2025316455696202, "grad_norm": 1.2670674058829114, "kl": 0.48828125, "learning_rate": 9.697653719638395e-07, "loss": 0.0002872650511562824, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 807, "train_speed(iter/s)": 0.029437 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/mean_length": 58.82291793823242, "completions/min_length": 35.75, "epoch": 1.2040208488458675, "grad_norm": 0.9838492552463858, "kl": 0.45947265625, "learning_rate": 9.696843284394974e-07, "loss": -0.0017102001002058387, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 808, "train_speed(iter/s)": 0.029443 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.25, "completions/mean_length": 61.18750190734863, "completions/min_length": 40.75, "epoch": 1.2055100521221147, "grad_norm": 0.006511011583474983, "kl": 0.49267578125, "learning_rate": 9.696031798380584e-07, "loss": 0.0004925834946334362, "memory(GiB)": 112.53, "reward": 1.4166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.4166666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 809, "train_speed(iter/s)": 0.029437 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/mean_length": 55.83333396911621, "completions/min_length": 39.5, "epoch": 1.206999255398362, "grad_norm": 1.493762157914068, "kl": 0.537109375, "learning_rate": 9.695219261776774e-07, "loss": -0.0002532128419261426, "memory(GiB)": 112.53, "reward": 1.6354167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 810, "train_speed(iter/s)": 0.029436 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/mean_length": 57.42708492279053, "completions/min_length": 38.25, "epoch": 1.208488458674609, "grad_norm": 2.2047148162808727, "kl": 0.486328125, "learning_rate": 9.694405674765317e-07, "loss": 0.016068683937191963, "memory(GiB)": 112.53, "reward": 1.4583333432674408, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.4583333358168602, "rewards/CineAccuracyORM/std": 0.47823499888181686, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 811, "train_speed(iter/s)": 0.02943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.5, "completions/mean_length": 58.50000190734863, "completions/min_length": 39.25, "epoch": 1.2099776619508562, "grad_norm": 0.006872352419815186, "kl": 0.50048828125, "learning_rate": 9.693591037528238e-07, "loss": 0.0005007315194234252, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 812, "train_speed(iter/s)": 0.029445 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/mean_length": 59.73958396911621, "completions/min_length": 40.25, "epoch": 1.2114668652271035, "grad_norm": 2.6756835890292554, "kl": 0.51611328125, "learning_rate": 9.692775350247777e-07, "loss": 0.0033039662521332502, "memory(GiB)": 112.53, "reward": 1.5520833432674408, "reward_std": 0.1602645255625248, "rewards/CineAccuracyORM/mean": 0.5520833358168602, "rewards/CineAccuracyORM/std": 0.3590897470712662, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 813, "train_speed(iter/s)": 0.02945 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.25, "completions/mean_length": 60.29166793823242, "completions/min_length": 42.75, "epoch": 1.2129560685033507, "grad_norm": 1.5599489979393149, "kl": 0.47216796875, "learning_rate": 9.691958613106422e-07, "loss": 0.002333692042157054, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 814, "train_speed(iter/s)": 0.029465 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.25, "completions/mean_length": 60.27083396911621, "completions/min_length": 38.25, "epoch": 1.214445271779598, "grad_norm": 1.884134123533761, "kl": 0.4765625, "learning_rate": 9.691140826286893e-07, "loss": -0.018251538276672363, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.14063050225377083, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3987635113298893, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 815, "train_speed(iter/s)": 0.02947 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 76.5, "completions/mean_length": 56.42708492279053, "completions/min_length": 39.75, "epoch": 1.2159344750558452, "grad_norm": 0.906258850842306, "kl": 0.50244140625, "learning_rate": 9.690321989972143e-07, "loss": 0.0005192921962589025, "memory(GiB)": 112.53, "reward": 1.4583333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.4583333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 816, "train_speed(iter/s)": 0.029468 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/mean_length": 58.85416889190674, "completions/min_length": 39.75, "epoch": 1.2174236783320924, "grad_norm": 1.8615333223003496, "kl": 0.4853515625, "learning_rate": 9.68950210434536e-07, "loss": -0.006907378323376179, "memory(GiB)": 112.53, "reward": 1.4895834028720856, "reward_std": 0.0876726359128952, "rewards/CineAccuracyORM/mean": 0.4895833507180214, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 817, "train_speed(iter/s)": 0.029484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.75, "completions/mean_length": 56.468750953674316, "completions/min_length": 35.25, "epoch": 1.2189128816083397, "grad_norm": 1.3067156253809942, "kl": 0.4990234375, "learning_rate": 9.688681169589968e-07, "loss": -0.007869548164308071, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.2397102490067482, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 818, "train_speed(iter/s)": 0.029495 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.75, "completions/mean_length": 59.80208492279053, "completions/min_length": 39.25, "epoch": 1.2204020848845867, "grad_norm": 1.0472199098667214, "kl": 0.48583984375, "learning_rate": 9.687859185889629e-07, "loss": -0.0008009732700884342, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 819, "train_speed(iter/s)": 0.02949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.0, "completions/mean_length": 60.89583396911621, "completions/min_length": 38.75, "epoch": 1.221891288160834, "grad_norm": 1.085128689593234, "kl": 0.4921875, "learning_rate": 9.68703615342823e-07, "loss": 0.0004762449534609914, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 820, "train_speed(iter/s)": 0.029495 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.25, "completions/mean_length": 62.07291793823242, "completions/min_length": 44.0, "epoch": 1.2233804914370812, "grad_norm": 1.612772804831542, "kl": 0.494140625, "learning_rate": 9.686212072389902e-07, "loss": -0.0036195178981870413, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.9479166865348816, "rewards/CineAccuracyORM/std": 0.1462044082581997, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 821, "train_speed(iter/s)": 0.029499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 58.81250190734863, "completions/min_length": 36.75, "epoch": 1.2248696947133284, "grad_norm": 0.8914941919510005, "kl": 0.52001953125, "learning_rate": 9.685386942959009e-07, "loss": -0.0021040942519903183, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 822, "train_speed(iter/s)": 0.029491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/mean_length": 59.68750190734863, "completions/min_length": 38.25, "epoch": 1.2263588979895756, "grad_norm": 1.9695374625469626, "kl": 0.4794921875, "learning_rate": 9.684560765320142e-07, "loss": 0.013264557346701622, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.1588566116988659, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.43498801440000534, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 823, "train_speed(iter/s)": 0.029485 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.75, "completions/mean_length": 58.68750190734863, "completions/min_length": 37.75, "epoch": 1.2278481012658227, "grad_norm": 0.9189580893072182, "kl": 0.4697265625, "learning_rate": 9.683733539658138e-07, "loss": 0.00920151174068451, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 824, "train_speed(iter/s)": 0.029499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.25, "completions/mean_length": 60.875, "completions/min_length": 37.5, "epoch": 1.22933730454207, "grad_norm": 1.3561918099362813, "kl": 0.505859375, "learning_rate": 9.682905266158061e-07, "loss": 0.004439661744982004, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.33919306844472885, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 825, "train_speed(iter/s)": 0.029503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.25, "completions/mean_length": 59.19791793823242, "completions/min_length": 41.0, "epoch": 1.2308265078183172, "grad_norm": 0.0074990508411968615, "kl": 0.53515625, "learning_rate": 9.68207594500521e-07, "loss": 0.0005354271270334721, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 826, "train_speed(iter/s)": 0.02951 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.5, "completions/mean_length": 57.67708396911621, "completions/min_length": 38.0, "epoch": 1.2323157110945644, "grad_norm": 0.007069788502302422, "kl": 0.4970703125, "learning_rate": 9.68124557638512e-07, "loss": 0.0004970317240804434, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 827, "train_speed(iter/s)": 0.029516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 61.45833492279053, "completions/min_length": 36.75, "epoch": 1.2338049143708116, "grad_norm": 0.0066295179558966836, "kl": 0.5029296875, "learning_rate": 9.68041416048356e-07, "loss": 0.0005020577227696776, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 828, "train_speed(iter/s)": 0.02949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.75, "completions/mean_length": 59.77083396911621, "completions/min_length": 36.5, "epoch": 1.2352941176470589, "grad_norm": 0.8548370833046874, "kl": 0.47314453125, "learning_rate": 9.679581697486533e-07, "loss": 0.000800657260697335, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 829, "train_speed(iter/s)": 0.029505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.5, "completions/mean_length": 59.718750953674316, "completions/min_length": 34.75, "epoch": 1.2367833209233061, "grad_norm": 0.0070635654870245795, "kl": 0.521484375, "learning_rate": 9.678748187580278e-07, "loss": 0.0005218042642809451, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 830, "train_speed(iter/s)": 0.029501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.5, "completions/mean_length": 59.05208492279053, "completions/min_length": 38.75, "epoch": 1.2382725241995534, "grad_norm": 1.4040863428198116, "kl": 0.50439453125, "learning_rate": 9.677913630951264e-07, "loss": 0.0005039925454184413, "memory(GiB)": 112.53, "reward": 1.3958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.3958333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 831, "train_speed(iter/s)": 0.029515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.5, "completions/mean_length": 62.23958396911621, "completions/min_length": 38.25, "epoch": 1.2397617274758004, "grad_norm": 1.8034750525534962, "kl": 0.4794921875, "learning_rate": 9.677078027786198e-07, "loss": 0.009783722460269928, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.1465982049703598, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.47217297554016113, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 832, "train_speed(iter/s)": 0.029509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/mean_length": 61.38541793823242, "completions/min_length": 41.5, "epoch": 1.2412509307520476, "grad_norm": 1.4415424444276812, "kl": 0.5009765625, "learning_rate": 9.67624137827202e-07, "loss": 0.016373727470636368, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.10661446116864681, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4805036932229996, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 833, "train_speed(iter/s)": 0.029521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 60.85416793823242, "completions/min_length": 40.5, "epoch": 1.2427401340282949, "grad_norm": 0.9889754958337581, "kl": 0.5068359375, "learning_rate": 9.675403682595906e-07, "loss": -0.0011441169772297144, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 834, "train_speed(iter/s)": 0.029518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.5, "completions/mean_length": 60.43750190734863, "completions/min_length": 38.0, "epoch": 1.244229337304542, "grad_norm": 0.7798396171061012, "kl": 0.5078125, "learning_rate": 9.67456494094526e-07, "loss": -0.004259665496647358, "memory(GiB)": 112.53, "reward": 1.5208334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 835, "train_speed(iter/s)": 0.029522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.0, "completions/mean_length": 59.58333396911621, "completions/min_length": 40.0, "epoch": 1.2457185405807893, "grad_norm": 0.0074960893230567455, "kl": 0.50390625, "learning_rate": 9.673725153507726e-07, "loss": 0.0005045407451689243, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 836, "train_speed(iter/s)": 0.029527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.25, "completions/mean_length": 60.54166889190674, "completions/min_length": 39.0, "epoch": 1.2472077438570364, "grad_norm": 0.00699412488809394, "kl": 0.5078125, "learning_rate": 9.672884320471181e-07, "loss": 0.0005082315765321255, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 837, "train_speed(iter/s)": 0.029541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 59.80208492279053, "completions/min_length": 39.5, "epoch": 1.2486969471332836, "grad_norm": 1.852812638701584, "kl": 0.50927734375, "learning_rate": 9.672042442023733e-07, "loss": 0.005082267336547375, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.1420449446886778, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.2397102490067482, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 838, "train_speed(iter/s)": 0.029536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.25, "completions/mean_length": 59.43750190734863, "completions/min_length": 39.75, "epoch": 1.2501861504095308, "grad_norm": 1.689661863577538, "kl": 0.49462890625, "learning_rate": 9.671199518353728e-07, "loss": -2.158790266548749e-05, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.4772557094693184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 839, "train_speed(iter/s)": 0.029541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.5, "completions/mean_length": 58.44791793823242, "completions/min_length": 36.5, "epoch": 1.251675353685778, "grad_norm": 1.8427540632435475, "kl": 0.50048828125, "learning_rate": 9.670355549649743e-07, "loss": 7.43257551221177e-05, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.14063050411641598, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.362364798784256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 840, "train_speed(iter/s)": 0.029527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.75, "completions/mean_length": 59.30208492279053, "completions/min_length": 40.75, "epoch": 1.2531645569620253, "grad_norm": 1.6940408447847304, "kl": 0.51123046875, "learning_rate": 9.66951053610059e-07, "loss": 0.002777865156531334, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.4289424866437912, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 841, "train_speed(iter/s)": 0.029532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.5, "completions/mean_length": 60.63541793823242, "completions/min_length": 41.5, "epoch": 1.2546537602382726, "grad_norm": 0.7503293422161975, "kl": 0.47021484375, "learning_rate": 9.66866447789531e-07, "loss": 0.004798226989805698, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 842, "train_speed(iter/s)": 0.029527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.25, "completions/mean_length": 60.593750953674316, "completions/min_length": 40.75, "epoch": 1.2561429635145198, "grad_norm": 0.8320433111197693, "kl": 0.490234375, "learning_rate": 9.667817375223188e-07, "loss": -0.00127372145652771, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 843, "train_speed(iter/s)": 0.029525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.25, "completions/mean_length": 60.677085876464844, "completions/min_length": 40.25, "epoch": 1.257632166790767, "grad_norm": 1.3690783557716506, "kl": 0.4970703125, "learning_rate": 9.666969228273732e-07, "loss": 0.003760312683880329, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.2419789433479309, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 844, "train_speed(iter/s)": 0.029528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.25, "completions/mean_length": 58.19791793823242, "completions/min_length": 36.0, "epoch": 1.259121370067014, "grad_norm": 1.5727779229948449, "kl": 0.5322265625, "learning_rate": 9.66612003723669e-07, "loss": 0.009369594976305962, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.11258216947317123, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3641507476568222, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 845, "train_speed(iter/s)": 0.029515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 81.0, "completions/mean_length": 56.625000953674316, "completions/min_length": 38.5, "epoch": 1.2606105733432613, "grad_norm": 0.0067947715072859995, "kl": 0.5009765625, "learning_rate": 9.665269802302043e-07, "loss": 0.0005010481690987945, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 846, "train_speed(iter/s)": 0.02953 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.5, "completions/mean_length": 59.35416889190674, "completions/min_length": 41.75, "epoch": 1.2620997766195086, "grad_norm": 1.2380108253163495, "kl": 0.49267578125, "learning_rate": 9.664418523660004e-07, "loss": 0.002826204989105463, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 847, "train_speed(iter/s)": 0.029536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.75, "completions/mean_length": 60.60416793823242, "completions/min_length": 39.0, "epoch": 1.2635889798957558, "grad_norm": 1.5580968976097078, "kl": 0.498046875, "learning_rate": 9.663566201501016e-07, "loss": -0.00015179379261098802, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.11572097800672054, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3699222281575203, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 848, "train_speed(iter/s)": 0.02954 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.5, "completions/mean_length": 57.833335876464844, "completions/min_length": 38.75, "epoch": 1.265078183172003, "grad_norm": 0.008545131720951704, "kl": 0.49658203125, "learning_rate": 9.662712836015762e-07, "loss": 0.0004957329947501421, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 849, "train_speed(iter/s)": 0.029555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.25, "completions/mean_length": 59.90625190734863, "completions/min_length": 39.5, "epoch": 1.26656738644825, "grad_norm": 2.1697902295405704, "kl": 0.486328125, "learning_rate": 9.661858427395157e-07, "loss": 0.004823820199817419, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.11572097800672054, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4438878297805786, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 850, "train_speed(iter/s)": 0.02956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 61.38541889190674, "completions/min_length": 39.75, "epoch": 1.2680565897244973, "grad_norm": 2.0526184198692903, "kl": 0.4951171875, "learning_rate": 9.661002975830349e-07, "loss": 0.012825626879930496, "memory(GiB)": 112.53, "reward": 1.5416666865348816, "reward_std": 0.15571125783026218, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.45438022166490555, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 851, "train_speed(iter/s)": 0.029554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.25, "completions/mean_length": 57.41666793823242, "completions/min_length": 36.0, "epoch": 1.2695457930007445, "grad_norm": 0.008271711130874309, "kl": 0.53515625, "learning_rate": 9.660146481512713e-07, "loss": 0.0005351880099624395, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 852, "train_speed(iter/s)": 0.029549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.5, "completions/mean_length": 63.25000190734863, "completions/min_length": 36.0, "epoch": 1.2710349962769918, "grad_norm": 1.3028959745176691, "kl": 0.49072265625, "learning_rate": 9.659288944633867e-07, "loss": 0.02243204414844513, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 853, "train_speed(iter/s)": 0.029552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.5, "completions/mean_length": 60.35416889190674, "completions/min_length": 40.25, "epoch": 1.272524199553239, "grad_norm": 1.7987396416901107, "kl": 0.486328125, "learning_rate": 9.65843036538566e-07, "loss": -0.011303924024105072, "memory(GiB)": 112.53, "reward": 1.447916716337204, "reward_std": 0.12696418724954128, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.49030808359384537, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 854, "train_speed(iter/s)": 0.029548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.5, "completions/mean_length": 58.86458492279053, "completions/min_length": 37.25, "epoch": 1.2740134028294863, "grad_norm": 1.7678617921062265, "kl": 0.52978515625, "learning_rate": 9.657570743960163e-07, "loss": -0.01673193648457527, "memory(GiB)": 112.53, "reward": 1.4687500298023224, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.4687500223517418, "rewards/CineAccuracyORM/std": 0.4369966685771942, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 855, "train_speed(iter/s)": 0.029544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/mean_length": 60.71875190734863, "completions/min_length": 39.0, "epoch": 1.2755026061057335, "grad_norm": 1.327398095876221, "kl": 0.5087890625, "learning_rate": 9.6567100805497e-07, "loss": 0.007787506096065044, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 856, "train_speed(iter/s)": 0.029544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.5, "completions/mean_length": 56.96875, "completions/min_length": 36.75, "epoch": 1.2769918093819808, "grad_norm": 0.7723143192424085, "kl": 0.51611328125, "learning_rate": 9.655848375346811e-07, "loss": -0.000769544392824173, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 857, "train_speed(iter/s)": 0.029555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 60.41666793823242, "completions/min_length": 34.5, "epoch": 1.2784810126582278, "grad_norm": 0.00809330989128054, "kl": 0.5, "learning_rate": 9.654985628544277e-07, "loss": 0.0004990893648937345, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 858, "train_speed(iter/s)": 0.029549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.75, "completions/mean_length": 56.92708492279053, "completions/min_length": 34.75, "epoch": 1.279970215934475, "grad_norm": 0.9946142889828393, "kl": 0.4755859375, "learning_rate": 9.65412184033511e-07, "loss": 0.008927593007683754, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 859, "train_speed(iter/s)": 0.029564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 58.76041793823242, "completions/min_length": 38.25, "epoch": 1.2814594192107223, "grad_norm": 0.8774903661050637, "kl": 0.529296875, "learning_rate": 9.653257010912558e-07, "loss": 0.011377725750207901, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 860, "train_speed(iter/s)": 0.029558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 57.125000953674316, "completions/min_length": 35.0, "epoch": 1.2829486224869695, "grad_norm": 0.0070770552878196375, "kl": 0.53515625, "learning_rate": 9.652391140470096e-07, "loss": 0.000535090861376375, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 861, "train_speed(iter/s)": 0.02955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.0, "completions/mean_length": 59.11458396911621, "completions/min_length": 37.75, "epoch": 1.2844378257632167, "grad_norm": 1.8446588888519604, "kl": 0.49609375, "learning_rate": 9.651524229201436e-07, "loss": 0.003900612471625209, "memory(GiB)": 112.53, "reward": 1.4375000596046448, "reward_std": 0.1451837606728077, "rewards/CineAccuracyORM/mean": 0.4375000176951289, "rewards/CineAccuracyORM/std": 0.42159612104296684, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 862, "train_speed(iter/s)": 0.029558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 60.05208396911621, "completions/min_length": 35.75, "epoch": 1.2859270290394638, "grad_norm": 1.94804654147838, "kl": 0.484375, "learning_rate": 9.650656277300524e-07, "loss": 0.0045200674794614315, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.15429682284593582, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.34280356764793396, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 863, "train_speed(iter/s)": 0.029554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.5, "completions/mean_length": 57.65625190734863, "completions/min_length": 35.5, "epoch": 1.287416232315711, "grad_norm": 0.9800705832123583, "kl": 0.490234375, "learning_rate": 9.649787284961536e-07, "loss": 0.013345537707209587, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.520833358168602, "rewards/CineAccuracyORM/std": 0.47292453050613403, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 864, "train_speed(iter/s)": 0.029568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.75, "completions/mean_length": 57.500000953674316, "completions/min_length": 36.0, "epoch": 1.2889054355919582, "grad_norm": 1.040010744323784, "kl": 0.4873046875, "learning_rate": 9.64891725237888e-07, "loss": -0.012028342112898827, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 865, "train_speed(iter/s)": 0.029573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 84.25, "completions/mean_length": 60.38541793823242, "completions/min_length": 37.25, "epoch": 1.2903946388682055, "grad_norm": 1.702765762038138, "kl": 0.4853515625, "learning_rate": 9.6480461797472e-07, "loss": 0.00029345229268074036, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.15429682284593582, "rewards/CineAccuracyORM/mean": 0.8020833507180214, "rewards/CineAccuracyORM/std": 0.2930001765489578, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 866, "train_speed(iter/s)": 0.029587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/mean_length": 57.79166793823242, "completions/min_length": 34.75, "epoch": 1.2918838421444527, "grad_norm": 1.7368123553041002, "kl": 0.50732421875, "learning_rate": 9.64717406726137e-07, "loss": -0.003746192902326584, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.1178511306643486, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3005674071609974, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 867, "train_speed(iter/s)": 0.029586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.5, "completions/mean_length": 58.85416793823242, "completions/min_length": 35.0, "epoch": 1.2933730454207, "grad_norm": 1.305964919174854, "kl": 0.4912109375, "learning_rate": 9.6463009151165e-07, "loss": -0.003628810402005911, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4772557094693184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 868, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.5, "completions/mean_length": 60.531250953674316, "completions/min_length": 37.5, "epoch": 1.2948622486969472, "grad_norm": 1.6944421416662243, "kl": 0.48291015625, "learning_rate": 9.645426723507928e-07, "loss": 0.0030111856758594513, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.10205466859042645, "rewards/CineAccuracyORM/mean": 0.552083358168602, "rewards/CineAccuracyORM/std": 0.4177052788436413, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 869, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.0, "completions/mean_length": 61.61458492279053, "completions/min_length": 38.75, "epoch": 1.2963514519731945, "grad_norm": 0.9018182756989269, "kl": 0.5009765625, "learning_rate": 9.64455149263123e-07, "loss": 0.009068870916962624, "memory(GiB)": 112.53, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833386108279, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 870, "train_speed(iter/s)": 0.029589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 61.96875190734863, "completions/min_length": 34.5, "epoch": 1.2978406552494415, "grad_norm": 1.1236333958065572, "kl": 0.48193359375, "learning_rate": 9.643675222682208e-07, "loss": 0.006044579204171896, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3477324768900871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 871, "train_speed(iter/s)": 0.029588 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.5, "completions/mean_length": 56.61458396911621, "completions/min_length": 32.0, "epoch": 1.2993298585256887, "grad_norm": 0.007073043708387499, "kl": 0.509765625, "learning_rate": 9.6427979138569e-07, "loss": 0.0005092034116387367, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 872, "train_speed(iter/s)": 0.029592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.5, "completions/mean_length": 62.531250953674316, "completions/min_length": 37.75, "epoch": 1.300819061801936, "grad_norm": 0.00796002700994191, "kl": 0.49072265625, "learning_rate": 9.641919566351579e-07, "loss": 0.0004896769532933831, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 873, "train_speed(iter/s)": 0.029586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/mean_length": 57.10416793823242, "completions/min_length": 38.75, "epoch": 1.3023082650781832, "grad_norm": 1.338778157647753, "kl": 0.51171875, "learning_rate": 9.641040180362746e-07, "loss": 0.005603370722383261, "memory(GiB)": 112.53, "reward": 1.447916716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.44791668467223644, "rewards/CineAccuracyORM/std": 0.42743058502674103, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 874, "train_speed(iter/s)": 0.029591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.5, "completions/mean_length": 64.09375286102295, "completions/min_length": 41.5, "epoch": 1.3037974683544304, "grad_norm": 1.2484215558901839, "kl": 0.46533203125, "learning_rate": 9.640159756087135e-07, "loss": -0.003917091526091099, "memory(GiB)": 112.53, "reward": 1.6979167461395264, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.43859851360321045, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 875, "train_speed(iter/s)": 0.029587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 58.80208492279053, "completions/min_length": 36.75, "epoch": 1.3052866716306775, "grad_norm": 2.40260676677143, "kl": 0.45703125, "learning_rate": 9.639278293721713e-07, "loss": -0.011257599107921124, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.14518376626074314, "rewards/CineAccuracyORM/mean": 0.6250000149011612, "rewards/CineAccuracyORM/std": 0.26532528176903725, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 876, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.75, "completions/mean_length": 61.80208396911621, "completions/min_length": 38.0, "epoch": 1.3067758749069247, "grad_norm": 1.7477409096044263, "kl": 0.46435546875, "learning_rate": 9.638395793463684e-07, "loss": 0.004900164902210236, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7187500223517418, "rewards/CineAccuracyORM/std": 0.33163563907146454, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 877, "train_speed(iter/s)": 0.029595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.25, "completions/mean_length": 58.06250190734863, "completions/min_length": 35.5, "epoch": 1.308265078183172, "grad_norm": 1.011570476622817, "kl": 0.50048828125, "learning_rate": 9.637512255510474e-07, "loss": 0.00886605866253376, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 878, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/mean_length": 60.76041793823242, "completions/min_length": 38.5, "epoch": 1.3097542814594192, "grad_norm": 1.2278325773746472, "kl": 0.49462890625, "learning_rate": 9.636627680059748e-07, "loss": -0.002547315089032054, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 879, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 62.61458492279053, "completions/min_length": 38.0, "epoch": 1.3112434847356664, "grad_norm": 1.0858491875772742, "kl": 0.45458984375, "learning_rate": 9.635742067309405e-07, "loss": 0.0021305351983755827, "memory(GiB)": 112.53, "reward": 1.291666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.29166667722165585, "rewards/CineAccuracyORM/std": 0.316870853304863, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 880, "train_speed(iter/s)": 0.029597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.75, "completions/mean_length": 61.77083492279053, "completions/min_length": 37.75, "epoch": 1.3127326880119137, "grad_norm": 1.8376150984624529, "kl": 0.4716796875, "learning_rate": 9.63485541745757e-07, "loss": 0.004146529361605644, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.12028077617287636, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3362630605697632, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 881, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.0, "completions/mean_length": 60.92708396911621, "completions/min_length": 38.75, "epoch": 1.314221891288161, "grad_norm": 2.3631545463288464, "kl": 0.47900390625, "learning_rate": 9.633967730702605e-07, "loss": -0.0035684274043887854, "memory(GiB)": 112.53, "reward": 1.8229167461395264, "reward_std": 0.17920634150505066, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.36526356637477875, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 882, "train_speed(iter/s)": 0.029595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 65.59375381469727, "completions/min_length": 37.75, "epoch": 1.3157110945644082, "grad_norm": 0.00547682398184287, "kl": 0.43212890625, "learning_rate": 9.633079007243102e-07, "loss": 0.00043141236528754234, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 883, "train_speed(iter/s)": 0.02959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.5, "completions/mean_length": 59.94791793823242, "completions/min_length": 37.0, "epoch": 1.3172002978406552, "grad_norm": 1.3045238963113743, "kl": 0.50341796875, "learning_rate": 9.632189247277883e-07, "loss": 0.0005035992944613099, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 884, "train_speed(iter/s)": 0.029586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/mean_length": 61.08333492279053, "completions/min_length": 37.75, "epoch": 1.3186895011169024, "grad_norm": 1.3471107448569979, "kl": 0.4990234375, "learning_rate": 9.631298451006005e-07, "loss": -0.01100851595401764, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.802083358168602, "rewards/CineAccuracyORM/std": 0.34608176350593567, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 885, "train_speed(iter/s)": 0.029571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 61.66666793823242, "completions/min_length": 38.25, "epoch": 1.3201787043931497, "grad_norm": 0.006065922104809623, "kl": 0.470703125, "learning_rate": 9.630406618626757e-07, "loss": 0.0004704640887212008, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 886, "train_speed(iter/s)": 0.029559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.5, "completions/mean_length": 61.44791889190674, "completions/min_length": 40.25, "epoch": 1.321667907669397, "grad_norm": 0.9926712099288592, "kl": 0.4794921875, "learning_rate": 9.629513750339654e-07, "loss": -0.0005662076291628182, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 887, "train_speed(iter/s)": 0.029551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 64.30208492279053, "completions/min_length": 39.75, "epoch": 1.3231571109456441, "grad_norm": 0.006526290597794877, "kl": 0.44189453125, "learning_rate": 9.628619846344453e-07, "loss": 0.00044139925739727914, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 888, "train_speed(iter/s)": 0.029526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.25, "completions/mean_length": 62.68750190734863, "completions/min_length": 40.0, "epoch": 1.3246463142218912, "grad_norm": 1.5077226256525678, "kl": 0.48828125, "learning_rate": 9.627724906841132e-07, "loss": -0.0009505513007752597, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6354166939854622, "rewards/CineAccuracyORM/std": 0.4177052788436413, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 889, "train_speed(iter/s)": 0.029529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.0, "completions/mean_length": 63.23958396911621, "completions/min_length": 40.0, "epoch": 1.3261355174981384, "grad_norm": 1.18926980545702, "kl": 0.4423828125, "learning_rate": 9.626828932029906e-07, "loss": -0.007237916346639395, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 890, "train_speed(iter/s)": 0.029534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 83.0, "completions/mean_length": 59.56250286102295, "completions/min_length": 43.25, "epoch": 1.3276247207743856, "grad_norm": 2.821173622124549, "kl": 0.46630859375, "learning_rate": 9.625931922111225e-07, "loss": 0.003627695143222809, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.20693820342421532, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.47066736966371536, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 891, "train_speed(iter/s)": 0.029538 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 62.52083396911621, "completions/min_length": 40.25, "epoch": 1.3291139240506329, "grad_norm": 1.3286923448655936, "kl": 0.4755859375, "learning_rate": 9.625033877285767e-07, "loss": -0.0007523050298914313, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 892, "train_speed(iter/s)": 0.029541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.5, "completions/mean_length": 57.87500190734863, "completions/min_length": 35.75, "epoch": 1.3306031273268801, "grad_norm": 2.0303692959371427, "kl": 0.50927734375, "learning_rate": 9.624134797754436e-07, "loss": -0.010153013281524181, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.1571257058531046, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 893, "train_speed(iter/s)": 0.029546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 64.75000190734863, "completions/min_length": 37.0, "epoch": 1.3320923306031274, "grad_norm": 1.5760235023521043, "kl": 0.44921875, "learning_rate": 9.623234683718376e-07, "loss": 0.005164578557014465, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.3019092120230198, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 894, "train_speed(iter/s)": 0.029549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.25, "completions/mean_length": 59.79166889190674, "completions/min_length": 39.25, "epoch": 1.3335815338793746, "grad_norm": 1.892342018317421, "kl": 0.4775390625, "learning_rate": 9.622333535378958e-07, "loss": 0.0360831543803215, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.15115800313651562, "rewards/CineAccuracyORM/mean": 0.6354166939854622, "rewards/CineAccuracyORM/std": 0.4520214945077896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 895, "train_speed(iter/s)": 0.029554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.0, "completions/mean_length": 65.50000190734863, "completions/min_length": 42.75, "epoch": 1.3350707371556219, "grad_norm": 2.155887418317522, "kl": 0.44677734375, "learning_rate": 9.621431352937787e-07, "loss": -0.01129855215549469, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.17150772735476494, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.362364798784256, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 896, "train_speed(iter/s)": 0.029558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.25, "completions/mean_length": 64.13541889190674, "completions/min_length": 41.5, "epoch": 1.3365599404318689, "grad_norm": 1.7216631138707916, "kl": 0.4853515625, "learning_rate": 9.620528136596698e-07, "loss": -0.018859628587961197, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 897, "train_speed(iter/s)": 0.029556 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 82.75, "completions/mean_length": 59.81250190734863, "completions/min_length": 39.25, "epoch": 1.3380491437081161, "grad_norm": 0.005630459688476034, "kl": 0.4736328125, "learning_rate": 9.619623886557757e-07, "loss": 0.00047281090519391, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 898, "train_speed(iter/s)": 0.029553 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.5, "completions/mean_length": 61.11458492279053, "completions/min_length": 39.0, "epoch": 1.3395383469843634, "grad_norm": 1.2893074174192132, "kl": 0.50634765625, "learning_rate": 9.61871860302326e-07, "loss": -0.003969023935496807, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 899, "train_speed(iter/s)": 0.029557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 64.82291793823242, "completions/min_length": 42.25, "epoch": 1.3410275502606106, "grad_norm": 1.5506110696122708, "kl": 0.44873046875, "learning_rate": 9.617812286195736e-07, "loss": 0.015056874603033066, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.3284776881337166, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 900, "train_speed(iter/s)": 0.029551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 63.02083396911621, "completions/min_length": 42.25, "epoch": 1.3425167535368578, "grad_norm": 1.7232153644784147, "kl": 0.4560546875, "learning_rate": 9.616904936277947e-07, "loss": 0.010105873458087444, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.12028077617287636, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3582116588950157, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 901, "train_speed(iter/s)": 0.029555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.5, "completions/mean_length": 62.61458396911621, "completions/min_length": 40.75, "epoch": 1.3440059568131049, "grad_norm": 0.005860203136595196, "kl": 0.49267578125, "learning_rate": 9.615996553472883e-07, "loss": 0.0004928281996399164, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 902, "train_speed(iter/s)": 0.02956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.5, "completions/mean_length": 62.75000190734863, "completions/min_length": 36.75, "epoch": 1.345495160089352, "grad_norm": 0.7226512958834687, "kl": 0.4638671875, "learning_rate": 9.615087137983766e-07, "loss": -0.00860495213419199, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 903, "train_speed(iter/s)": 0.029555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.25, "completions/mean_length": 63.885419845581055, "completions/min_length": 40.25, "epoch": 1.3469843633655993, "grad_norm": 0.005589021982871392, "kl": 0.42578125, "learning_rate": 9.614176690014049e-07, "loss": 0.0004258574917912483, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 904, "train_speed(iter/s)": 0.029568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.5, "completions/mean_length": 59.91666889190674, "completions/min_length": 37.5, "epoch": 1.3484735666418466, "grad_norm": 0.005497757264134115, "kl": 0.474609375, "learning_rate": 9.613265209767415e-07, "loss": 0.00047424176591448486, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 905, "train_speed(iter/s)": 0.029563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 61.58333492279053, "completions/min_length": 39.75, "epoch": 1.3499627699180938, "grad_norm": 1.9251201538616054, "kl": 0.49365234375, "learning_rate": 9.61235269744778e-07, "loss": -0.008227696642279625, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.3739768713712692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 906, "train_speed(iter/s)": 0.029567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 65.25000190734863, "completions/min_length": 40.25, "epoch": 1.351451973194341, "grad_norm": 1.8063290773546017, "kl": 0.47119140625, "learning_rate": 9.611439153259291e-07, "loss": 0.0011746002128347754, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.41666667722165585, "rewards/CineAccuracyORM/std": 0.44184649735689163, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 907, "train_speed(iter/s)": 0.029571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 89.5, "completions/mean_length": 62.76041793823242, "completions/min_length": 43.5, "epoch": 1.3529411764705883, "grad_norm": 1.0825916917008267, "kl": 0.46923828125, "learning_rate": 9.610524577406323e-07, "loss": -0.008042710833251476, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 908, "train_speed(iter/s)": 0.029575 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.5, "completions/mean_length": 64.69791889190674, "completions/min_length": 45.5, "epoch": 1.3544303797468356, "grad_norm": 1.1218634350749512, "kl": 0.4306640625, "learning_rate": 9.609608970093486e-07, "loss": -0.006584102287888527, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.5520833386108279, "rewards/CineAccuracyORM/std": 0.29910537227988243, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 909, "train_speed(iter/s)": 0.02957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.75, "completions/mean_length": 60.812500953674316, "completions/min_length": 41.0, "epoch": 1.3559195830230826, "grad_norm": 1.557621005301582, "kl": 0.4990234375, "learning_rate": 9.608692331525615e-07, "loss": 0.0210355706512928, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.11258216947317123, "rewards/CineAccuracyORM/mean": 0.8854166865348816, "rewards/CineAccuracyORM/std": 0.21124978363513947, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 910, "train_speed(iter/s)": 0.029566 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.5, "completions/mean_length": 61.37500190734863, "completions/min_length": 38.0, "epoch": 1.3574087862993298, "grad_norm": 1.3364457962755896, "kl": 0.478515625, "learning_rate": 9.60777466190778e-07, "loss": -0.009891029447317123, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.4703870266675949, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 911, "train_speed(iter/s)": 0.029578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/mean_length": 61.64583396911621, "completions/min_length": 41.75, "epoch": 1.358897989575577, "grad_norm": 1.7354718948187295, "kl": 0.490234375, "learning_rate": 9.606855961445284e-07, "loss": 0.01251327432692051, "memory(GiB)": 112.53, "reward": 1.4375000298023224, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.43750001303851604, "rewards/CineAccuracyORM/std": 0.36563319340348244, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 912, "train_speed(iter/s)": 0.029575 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.0, "completions/mean_length": 62.30208396911621, "completions/min_length": 43.0, "epoch": 1.3603871928518243, "grad_norm": 0.0064007434560023006, "kl": 0.4833984375, "learning_rate": 9.605936230343654e-07, "loss": 0.00048360321670770645, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 913, "train_speed(iter/s)": 0.029567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.75, "completions/mean_length": 61.802085876464844, "completions/min_length": 39.5, "epoch": 1.3618763961280715, "grad_norm": 1.551429291285735, "kl": 0.48828125, "learning_rate": 9.60501546880865e-07, "loss": 0.001927406876347959, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.3015497848391533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 914, "train_speed(iter/s)": 0.029571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 60.44791793823242, "completions/min_length": 41.25, "epoch": 1.3633655994043186, "grad_norm": 0.006332082352774787, "kl": 0.47021484375, "learning_rate": 9.604093677046267e-07, "loss": 0.00046989391557872295, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 915, "train_speed(iter/s)": 0.029584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.75, "completions/mean_length": 61.96875286102295, "completions/min_length": 43.5, "epoch": 1.3648548026805658, "grad_norm": 1.1941976834532648, "kl": 0.45751953125, "learning_rate": 9.603170855262723e-07, "loss": -0.0053553348407149315, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 916, "train_speed(iter/s)": 0.029587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.25, "completions/mean_length": 61.96875190734863, "completions/min_length": 37.0, "epoch": 1.366344005956813, "grad_norm": 0.8769287405482926, "kl": 0.482421875, "learning_rate": 9.602247003664476e-07, "loss": 0.00795285589993, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 917, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.5, "completions/mean_length": 62.145835876464844, "completions/min_length": 39.5, "epoch": 1.3678332092330603, "grad_norm": 0.8920885916618956, "kl": 0.48486328125, "learning_rate": 9.601322122458203e-07, "loss": -0.008699947968125343, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 918, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.25, "completions/mean_length": 63.81250286102295, "completions/min_length": 38.25, "epoch": 1.3693224125093075, "grad_norm": 1.3697871512480462, "kl": 0.49169921875, "learning_rate": 9.60039621185082e-07, "loss": -0.0006613782024942338, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.4681183323264122, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 919, "train_speed(iter/s)": 0.029608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/mean_length": 63.354166984558105, "completions/min_length": 44.0, "epoch": 1.3708116157855548, "grad_norm": 1.5934895878401576, "kl": 0.52978515625, "learning_rate": 9.599469272049468e-07, "loss": 0.0023447524290531874, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.35178712010383606, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 920, "train_speed(iter/s)": 0.029612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.25, "completions/mean_length": 62.82291889190674, "completions/min_length": 40.5, "epoch": 1.372300819061802, "grad_norm": 1.6927691412420611, "kl": 0.47021484375, "learning_rate": 9.598541303261523e-07, "loss": -0.01525517925620079, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.10205466859042645, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 921, "train_speed(iter/s)": 0.029609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.25, "completions/mean_length": 63.92708492279053, "completions/min_length": 42.75, "epoch": 1.3737900223380493, "grad_norm": 1.4505406662017215, "kl": 0.48583984375, "learning_rate": 9.597612305694588e-07, "loss": -0.004931551869958639, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.1171354167163372, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.35134297609329224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 922, "train_speed(iter/s)": 0.029604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.5, "completions/mean_length": 65.09375190734863, "completions/min_length": 42.75, "epoch": 1.3752792256142963, "grad_norm": 0.9626240077287112, "kl": 0.45849609375, "learning_rate": 9.596682279556498e-07, "loss": -0.016584744676947594, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 923, "train_speed(iter/s)": 0.029597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.25, "completions/mean_length": 63.89583683013916, "completions/min_length": 43.0, "epoch": 1.3767684288905435, "grad_norm": 2.6155695505329275, "kl": 0.4697265625, "learning_rate": 9.595751225055316e-07, "loss": -0.004077373072504997, "memory(GiB)": 112.53, "reward": 1.6979167461395264, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 924, "train_speed(iter/s)": 0.029591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 61.89583492279053, "completions/min_length": 39.75, "epoch": 1.3782576321667908, "grad_norm": 1.312716892801928, "kl": 0.48291015625, "learning_rate": 9.594819142399334e-07, "loss": -0.004220190923660994, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 925, "train_speed(iter/s)": 0.029595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.25, "completions/mean_length": 63.947916984558105, "completions/min_length": 41.0, "epoch": 1.379746835443038, "grad_norm": 0.7539419705746333, "kl": 0.4677734375, "learning_rate": 9.593886031797081e-07, "loss": -0.006681500002741814, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 926, "train_speed(iter/s)": 0.02959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.5, "completions/mean_length": 67.80208587646484, "completions/min_length": 46.75, "epoch": 1.3812360387192852, "grad_norm": 0.9163913813892792, "kl": 0.435546875, "learning_rate": 9.592951893457308e-07, "loss": 0.014522729441523552, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 927, "train_speed(iter/s)": 0.029593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.75, "completions/mean_length": 68.27083587646484, "completions/min_length": 43.5, "epoch": 1.3827252419955323, "grad_norm": 0.937442079873498, "kl": 0.447265625, "learning_rate": 9.592016727588998e-07, "loss": -0.005905409809201956, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 928, "train_speed(iter/s)": 0.029597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.25, "completions/mean_length": 66.42708587646484, "completions/min_length": 42.0, "epoch": 1.3842144452717795, "grad_norm": 1.3215989731824416, "kl": 0.46435546875, "learning_rate": 9.591080534401371e-07, "loss": -0.008125923573970795, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 929, "train_speed(iter/s)": 0.029609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.5, "completions/mean_length": 69.40625381469727, "completions/min_length": 46.5, "epoch": 1.3857036485480267, "grad_norm": 0.9215177521344338, "kl": 0.4541015625, "learning_rate": 9.590143314103862e-07, "loss": 0.012701043859124184, "memory(GiB)": 112.53, "reward": 1.4583333730697632, "reward_std": 0.0629940778017044, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 930, "train_speed(iter/s)": 0.029605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 69.78125286102295, "completions/min_length": 45.25, "epoch": 1.387192851824274, "grad_norm": 0.9791117657747086, "kl": 0.435546875, "learning_rate": 9.589205066906153e-07, "loss": -0.0013262963620945811, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 931, "train_speed(iter/s)": 0.029609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 68.08333683013916, "completions/min_length": 44.25, "epoch": 1.3886820551005212, "grad_norm": 0.8157402346628111, "kl": 0.46337890625, "learning_rate": 9.58826579301814e-07, "loss": -0.0032939701341092587, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 932, "train_speed(iter/s)": 0.029601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 65.14583396911621, "completions/min_length": 41.75, "epoch": 1.3901712583767685, "grad_norm": 1.3701544183432344, "kl": 0.4912109375, "learning_rate": 9.58732549264996e-07, "loss": -0.007988279685378075, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.30001722276210785, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 933, "train_speed(iter/s)": 0.029604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.5, "completions/mean_length": 64.53125190734863, "completions/min_length": 41.25, "epoch": 1.3916604616530157, "grad_norm": 0.585745198202054, "kl": 0.48388671875, "learning_rate": 9.586384166011976e-07, "loss": -0.00918412022292614, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 934, "train_speed(iter/s)": 0.029616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 68.04166984558105, "completions/min_length": 40.75, "epoch": 1.393149664929263, "grad_norm": 0.007781977081642315, "kl": 0.46923828125, "learning_rate": 9.58544181331478e-07, "loss": 0.0004690834030043334, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 935, "train_speed(iter/s)": 0.029603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 66.48958492279053, "completions/min_length": 42.75, "epoch": 1.39463886820551, "grad_norm": 1.0242491284981616, "kl": 0.47509765625, "learning_rate": 9.58449843476919e-07, "loss": -0.005422198213636875, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 936, "train_speed(iter/s)": 0.029589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 69.06250190734863, "completions/min_length": 45.0, "epoch": 1.3961280714817572, "grad_norm": 1.7997785584580046, "kl": 0.46240234375, "learning_rate": 9.583554030586262e-07, "loss": -0.016512732952833176, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 937, "train_speed(iter/s)": 0.029584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.5, "completions/mean_length": 67.18750286102295, "completions/min_length": 45.5, "epoch": 1.3976172747580045, "grad_norm": 1.5016023533076526, "kl": 0.45751953125, "learning_rate": 9.582608600977274e-07, "loss": 0.000983542064204812, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 938, "train_speed(iter/s)": 0.029589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 66.26041889190674, "completions/min_length": 44.5, "epoch": 1.3991064780342517, "grad_norm": 2.3637759870121475, "kl": 0.44677734375, "learning_rate": 9.58166214615374e-07, "loss": 0.008372282609343529, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.5833333386108279, "rewards/CineAccuracyORM/std": 0.2888568378984928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 939, "train_speed(iter/s)": 0.029591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.5, "completions/mean_length": 69.44792079925537, "completions/min_length": 42.75, "epoch": 1.400595681310499, "grad_norm": 0.00766693492649146, "kl": 0.47119140625, "learning_rate": 9.580714666327394e-07, "loss": 0.0004710272769443691, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 940, "train_speed(iter/s)": 0.02958 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/mean_length": 72.03125190734863, "completions/min_length": 48.25, "epoch": 1.402084884586746, "grad_norm": 0.8010696875696448, "kl": 0.466796875, "learning_rate": 9.579766161710207e-07, "loss": 0.004639989230781794, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 941, "train_speed(iter/s)": 0.029569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 67.65625381469727, "completions/min_length": 46.0, "epoch": 1.4035740878629932, "grad_norm": 0.6741709641838847, "kl": 0.46826171875, "learning_rate": 9.57881663251438e-07, "loss": 0.0016112083103507757, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 942, "train_speed(iter/s)": 0.029564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.25, "completions/mean_length": 69.21875190734863, "completions/min_length": 44.0, "epoch": 1.4050632911392404, "grad_norm": 0.790062704095768, "kl": 0.447265625, "learning_rate": 9.577866078952335e-07, "loss": -0.0020157780963927507, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 943, "train_speed(iter/s)": 0.029568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 68.58333778381348, "completions/min_length": 41.5, "epoch": 1.4065524944154877, "grad_norm": 1.0382345494709737, "kl": 0.45068359375, "learning_rate": 9.576914501236732e-07, "loss": 0.0014002774842083454, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 944, "train_speed(iter/s)": 0.029572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.5, "completions/mean_length": 72.73958587646484, "completions/min_length": 50.25, "epoch": 1.408041697691735, "grad_norm": 0.006125922176539128, "kl": 0.4384765625, "learning_rate": 9.575961899580457e-07, "loss": 0.000437782087828964, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 945, "train_speed(iter/s)": 0.02957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 70.96875381469727, "completions/min_length": 47.0, "epoch": 1.4095309009679822, "grad_norm": 0.7334660476780464, "kl": 0.46142578125, "learning_rate": 9.575008274196623e-07, "loss": 0.00436636246740818, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 946, "train_speed(iter/s)": 0.029574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.5, "completions/mean_length": 68.14583396911621, "completions/min_length": 45.75, "epoch": 1.4110201042442294, "grad_norm": 0.0070778070413648544, "kl": 0.4765625, "learning_rate": 9.574053625298575e-07, "loss": 0.00047666969476267695, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 947, "train_speed(iter/s)": 0.029569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 70.31250190734863, "completions/min_length": 44.75, "epoch": 1.4125093075204767, "grad_norm": 0.006939956593885968, "kl": 0.45849609375, "learning_rate": 9.573097953099884e-07, "loss": 0.00045748380944132805, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 948, "train_speed(iter/s)": 0.029581 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/mean_length": 71.00000190734863, "completions/min_length": 48.0, "epoch": 1.4139985107967237, "grad_norm": 2.124780078917527, "kl": 0.44677734375, "learning_rate": 9.572141257814351e-07, "loss": -0.007206979673355818, "memory(GiB)": 112.53, "reward": 1.5312500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.3307463824748993, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 949, "train_speed(iter/s)": 0.029585 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.75, "completions/mean_length": 70.00000381469727, "completions/min_length": 46.0, "epoch": 1.415487714072971, "grad_norm": 1.9046144513899395, "kl": 0.46142578125, "learning_rate": 9.57118353965601e-07, "loss": 0.009021485224366188, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3753186762332916, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 950, "train_speed(iter/s)": 0.029589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 71.67708778381348, "completions/min_length": 44.0, "epoch": 1.4169769173492182, "grad_norm": 1.1146909979422897, "kl": 0.47509765625, "learning_rate": 9.570224798839117e-07, "loss": 0.006274989806115627, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 951, "train_speed(iter/s)": 0.029581 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.25, "completions/mean_length": 66.01041793823242, "completions/min_length": 41.75, "epoch": 1.4184661206254654, "grad_norm": 1.229974363597384, "kl": 0.4580078125, "learning_rate": 9.569265035578163e-07, "loss": -0.007128878962248564, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 952, "train_speed(iter/s)": 0.02958 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 71.02083587646484, "completions/min_length": 50.0, "epoch": 1.4199553239017126, "grad_norm": 1.1167053994411222, "kl": 0.44970703125, "learning_rate": 9.56830425008786e-07, "loss": 0.007657000329345465, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 953, "train_speed(iter/s)": 0.029591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 72.53125190734863, "completions/min_length": 48.25, "epoch": 1.4214445271779597, "grad_norm": 0.7100110281350355, "kl": 0.45556640625, "learning_rate": 9.567342442583163e-07, "loss": -0.002363643143326044, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 954, "train_speed(iter/s)": 0.02959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 72.47916793823242, "completions/min_length": 47.5, "epoch": 1.422933730454207, "grad_norm": 0.006440943381345145, "kl": 0.455078125, "learning_rate": 9.566379613279233e-07, "loss": 0.0004556688363663852, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 955, "train_speed(iter/s)": 0.029602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.25, "completions/mean_length": 70.47916793823242, "completions/min_length": 48.75, "epoch": 1.4244229337304541, "grad_norm": 0.8205761939227146, "kl": 0.45654296875, "learning_rate": 9.565415762391484e-07, "loss": 0.00429933937266469, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833386108279, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 956, "train_speed(iter/s)": 0.029607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 70.43750381469727, "completions/min_length": 43.5, "epoch": 1.4259121370067014, "grad_norm": 0.007209095166445115, "kl": 0.4765625, "learning_rate": 9.564450890135543e-07, "loss": 0.00047674644156359136, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 957, "train_speed(iter/s)": 0.029619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.75, "completions/mean_length": 71.12500190734863, "completions/min_length": 45.25, "epoch": 1.4274013402829486, "grad_norm": 0.9134777253187855, "kl": 0.46484375, "learning_rate": 9.563484996727268e-07, "loss": 0.015356531366705894, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 958, "train_speed(iter/s)": 0.029623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 70.31250190734863, "completions/min_length": 50.0, "epoch": 1.4288905435591959, "grad_norm": 1.2699224428174711, "kl": 0.47119140625, "learning_rate": 9.56251808238275e-07, "loss": 0.0003373246581759304, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 959, "train_speed(iter/s)": 0.029601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.75, "completions/mean_length": 69.07291984558105, "completions/min_length": 47.0, "epoch": 1.4303797468354431, "grad_norm": 1.13508167311763, "kl": 0.47314453125, "learning_rate": 9.561550147318305e-07, "loss": 0.007095588371157646, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 960, "train_speed(iter/s)": 0.029612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.75, "completions/mean_length": 68.42708396911621, "completions/min_length": 45.75, "epoch": 1.4318689501116904, "grad_norm": 0.6896068000476634, "kl": 0.43359375, "learning_rate": 9.560581191750477e-07, "loss": 0.007963860407471657, "memory(GiB)": 112.53, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 961, "train_speed(iter/s)": 0.029624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.5, "completions/mean_length": 68.08333492279053, "completions/min_length": 43.0, "epoch": 1.4333581533879374, "grad_norm": 0.005837139517258442, "kl": 0.45849609375, "learning_rate": 9.559611215896039e-07, "loss": 0.000457898189779371, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 962, "train_speed(iter/s)": 0.029632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.75, "completions/mean_length": 69.40625095367432, "completions/min_length": 45.0, "epoch": 1.4348473566641846, "grad_norm": 0.006219887221174672, "kl": 0.4580078125, "learning_rate": 9.558640219971997e-07, "loss": 0.00045794405741617084, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 963, "train_speed(iter/s)": 0.029636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 66.89583587646484, "completions/min_length": 43.0, "epoch": 1.4363365599404319, "grad_norm": 1.2823648266793835, "kl": 0.4619140625, "learning_rate": 9.557668204195574e-07, "loss": -0.0015780137619003654, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5520833414047956, "rewards/CineAccuracyORM/std": 0.3182126581668854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 964, "train_speed(iter/s)": 0.029647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 72.90625381469727, "completions/min_length": 47.75, "epoch": 1.437825763216679, "grad_norm": 1.3294634607515254, "kl": 0.44775390625, "learning_rate": 9.556695168784235e-07, "loss": -0.0023232637904584408, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3470437303185463, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 965, "train_speed(iter/s)": 0.029636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.25, "completions/mean_length": 68.23958587646484, "completions/min_length": 42.0, "epoch": 1.4393149664929263, "grad_norm": 0.006480190963781538, "kl": 0.4921875, "learning_rate": 9.55572111395566e-07, "loss": 0.0004921153886243701, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 966, "train_speed(iter/s)": 0.029623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.0, "completions/mean_length": 67.78125095367432, "completions/min_length": 45.25, "epoch": 1.4408041697691734, "grad_norm": 0.006225143038434591, "kl": 0.45556640625, "learning_rate": 9.554746039927767e-07, "loss": 0.0004558642103802413, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 967, "train_speed(iter/s)": 0.029626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.25, "completions/mean_length": 63.94791889190674, "completions/min_length": 38.0, "epoch": 1.4422933730454206, "grad_norm": 0.006979879349351627, "kl": 0.44970703125, "learning_rate": 9.553769946918698e-07, "loss": 0.0004505404212977737, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 968, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 68.66666984558105, "completions/min_length": 45.25, "epoch": 1.4437825763216678, "grad_norm": 0.006155925720941595, "kl": 0.4560546875, "learning_rate": 9.55279283514682e-07, "loss": 0.0004556730855256319, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 969, "train_speed(iter/s)": 0.029642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 69.38541793823242, "completions/min_length": 46.0, "epoch": 1.445271779597915, "grad_norm": 1.6757203346256262, "kl": 0.45556640625, "learning_rate": 9.551814704830734e-07, "loss": 0.0070162671618163586, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.7500000298023224, "rewards/CineAccuracyORM/std": 0.3483504578471184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 970, "train_speed(iter/s)": 0.029647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.5, "completions/mean_length": 66.34375095367432, "completions/min_length": 44.0, "epoch": 1.4467609828741623, "grad_norm": 0.7986820912643403, "kl": 0.4931640625, "learning_rate": 9.550835556189263e-07, "loss": -0.0013836706057190895, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 971, "train_speed(iter/s)": 0.029642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.75, "completions/mean_length": 70.69791793823242, "completions/min_length": 44.0, "epoch": 1.4482501861504096, "grad_norm": 0.8969197925097191, "kl": 0.435546875, "learning_rate": 9.549855389441464e-07, "loss": 0.0031400432344526052, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 972, "train_speed(iter/s)": 0.029644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 66.09375381469727, "completions/min_length": 41.0, "epoch": 1.4497393894266568, "grad_norm": 1.5146489104213348, "kl": 0.48095703125, "learning_rate": 9.548874204806618e-07, "loss": -0.00512265507131815, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 973, "train_speed(iter/s)": 0.029627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 68.19791984558105, "completions/min_length": 48.75, "epoch": 1.451228592702904, "grad_norm": 1.2634036861384579, "kl": 0.4521484375, "learning_rate": 9.547892002504231e-07, "loss": 0.0025950074195861816, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 974, "train_speed(iter/s)": 0.029622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 66.83333587646484, "completions/min_length": 46.75, "epoch": 1.452717795979151, "grad_norm": 0.005958499426131517, "kl": 0.45556640625, "learning_rate": 9.546908782754044e-07, "loss": 0.00045583321480080485, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 975, "train_speed(iter/s)": 0.029624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.5, "completions/mean_length": 65.58333587646484, "completions/min_length": 39.5, "epoch": 1.4542069992553983, "grad_norm": 0.006385401367125602, "kl": 0.48193359375, "learning_rate": 9.54592454577602e-07, "loss": 0.00048135159886442125, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 976, "train_speed(iter/s)": 0.029629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 65.46875286102295, "completions/min_length": 43.75, "epoch": 1.4556962025316456, "grad_norm": 2.246586939313106, "kl": 0.4755859375, "learning_rate": 9.54493929179035e-07, "loss": 0.010857382789254189, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.1308017335832119, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.44130611419677734, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 977, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 66.48958587646484, "completions/min_length": 47.5, "epoch": 1.4571854058078928, "grad_norm": 0.0061936284765419765, "kl": 0.42919921875, "learning_rate": 9.543953021017454e-07, "loss": 0.00042910510092042387, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 978, "train_speed(iter/s)": 0.029641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.25, "completions/mean_length": 68.28125190734863, "completions/min_length": 41.0, "epoch": 1.45867460908414, "grad_norm": 2.5932688456657846, "kl": 0.4404296875, "learning_rate": 9.54296573367798e-07, "loss": 0.018705174326896667, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.1497435588389635, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.40851055085659027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 979, "train_speed(iter/s)": 0.029644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/mean_length": 61.17708396911621, "completions/min_length": 40.5, "epoch": 1.460163812360387, "grad_norm": 2.644382440995591, "kl": 0.48388671875, "learning_rate": 9.5419774299928e-07, "loss": -0.005943982861936092, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.3739768713712692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 980, "train_speed(iter/s)": 0.029635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.75, "completions/mean_length": 64.76041889190674, "completions/min_length": 40.25, "epoch": 1.4616530156366343, "grad_norm": 1.1019217652880282, "kl": 0.49072265625, "learning_rate": 9.540988110183022e-07, "loss": 0.007561097852885723, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 981, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.0, "completions/mean_length": 61.26041793823242, "completions/min_length": 38.75, "epoch": 1.4631422189128815, "grad_norm": 0.007762765946812152, "kl": 0.4892578125, "learning_rate": 9.539997774469967e-07, "loss": 0.0004899228224530816, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 982, "train_speed(iter/s)": 0.029631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.25, "completions/mean_length": 67.11458778381348, "completions/min_length": 46.0, "epoch": 1.4646314221891288, "grad_norm": 2.696085434987925, "kl": 0.4541015625, "learning_rate": 9.539006423075198e-07, "loss": -0.0077204834669828415, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.14204495213925838, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.3348836228251457, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 983, "train_speed(iter/s)": 0.029643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.0, "completions/mean_length": 65.09375381469727, "completions/min_length": 47.25, "epoch": 1.466120625465376, "grad_norm": 0.8313098217690554, "kl": 0.46875, "learning_rate": 9.538014056220493e-07, "loss": 0.00046885127085261047, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 984, "train_speed(iter/s)": 0.029629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 64.89583396911621, "completions/min_length": 43.5, "epoch": 1.4676098287416233, "grad_norm": 0.007081412168860864, "kl": 0.46875, "learning_rate": 9.53702067412787e-07, "loss": 0.0004688226617872715, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 985, "train_speed(iter/s)": 0.029632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.0, "completions/mean_length": 66.71875095367432, "completions/min_length": 45.0, "epoch": 1.4690990320178705, "grad_norm": 0.8790814873365915, "kl": 0.4404296875, "learning_rate": 9.53602627701956e-07, "loss": 0.0012723022373393178, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 986, "train_speed(iter/s)": 0.029636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 88.75, "completions/mean_length": 65.60416984558105, "completions/min_length": 43.25, "epoch": 1.4705882352941178, "grad_norm": 1.660741083634962, "kl": 0.4921875, "learning_rate": 9.535030865118032e-07, "loss": -0.004963994026184082, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3753186762332916, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 987, "train_speed(iter/s)": 0.029648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/mean_length": 66.72916984558105, "completions/min_length": 44.25, "epoch": 1.472077438570365, "grad_norm": 0.7818133155029614, "kl": 0.478515625, "learning_rate": 9.534034438645977e-07, "loss": 0.005557069554924965, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 988, "train_speed(iter/s)": 0.029652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.25, "completions/mean_length": 62.38541793823242, "completions/min_length": 40.5, "epoch": 1.473566641846612, "grad_norm": 0.6649116970302317, "kl": 0.48486328125, "learning_rate": 9.533036997826314e-07, "loss": -0.007842972874641418, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 989, "train_speed(iter/s)": 0.029664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 65.33333396911621, "completions/min_length": 40.25, "epoch": 1.4750558451228593, "grad_norm": 1.3204543722591284, "kl": 0.482421875, "learning_rate": 9.532038542882189e-07, "loss": 0.0007638821844011545, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 990, "train_speed(iter/s)": 0.029667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 67.8125, "completions/min_length": 47.75, "epoch": 1.4765450483991065, "grad_norm": 1.7274235753152356, "kl": 0.45751953125, "learning_rate": 9.531039074036976e-07, "loss": 0.002857046201825142, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 991, "train_speed(iter/s)": 0.029662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.5, "completions/mean_length": 62.281250953674316, "completions/min_length": 45.5, "epoch": 1.4780342516753537, "grad_norm": 1.0541997865481416, "kl": 0.48779296875, "learning_rate": 9.530038591514273e-07, "loss": -0.011959592811763287, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 992, "train_speed(iter/s)": 0.029666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.25, "completions/mean_length": 64.41666889190674, "completions/min_length": 45.0, "epoch": 1.4795234549516008, "grad_norm": 0.8756366114096834, "kl": 0.48095703125, "learning_rate": 9.529037095537908e-07, "loss": -0.005572163965553045, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 993, "train_speed(iter/s)": 0.02967 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 65.26041984558105, "completions/min_length": 44.25, "epoch": 1.481012658227848, "grad_norm": 1.7923089154295295, "kl": 0.4453125, "learning_rate": 9.528034586331934e-07, "loss": -0.004138474818319082, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6354166939854622, "rewards/CineAccuracyORM/std": 0.4520214945077896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 994, "train_speed(iter/s)": 0.029677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.25, "completions/mean_length": 64.86458587646484, "completions/min_length": 44.25, "epoch": 1.4825018615040952, "grad_norm": 0.006108934636009535, "kl": 0.47314453125, "learning_rate": 9.52703106412063e-07, "loss": 0.0004732540110126138, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 995, "train_speed(iter/s)": 0.029675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.25, "completions/mean_length": 64.29166889190674, "completions/min_length": 44.5, "epoch": 1.4839910647803425, "grad_norm": 1.2457765831264898, "kl": 0.46728515625, "learning_rate": 9.526026529128504e-07, "loss": 0.007985908538103104, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 996, "train_speed(iter/s)": 0.029686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 68.78125190734863, "completions/min_length": 44.0, "epoch": 1.4854802680565897, "grad_norm": 0.8596898308248572, "kl": 0.443359375, "learning_rate": 9.525020981580287e-07, "loss": -0.009649247862398624, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 997, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.0, "completions/mean_length": 68.13541984558105, "completions/min_length": 42.75, "epoch": 1.486969471332837, "grad_norm": 0.7734148024762988, "kl": 0.4599609375, "learning_rate": 9.524014421700941e-07, "loss": 0.00159303261898458, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 998, "train_speed(iter/s)": 0.02968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.25, "completions/mean_length": 65.13542079925537, "completions/min_length": 44.5, "epoch": 1.4884586746090842, "grad_norm": 1.4928422846244753, "kl": 0.45703125, "learning_rate": 9.52300684971565e-07, "loss": 0.014619852416217327, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.4595789238810539, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 999, "train_speed(iter/s)": 0.029684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.5, "completions/mean_length": 68.05208492279053, "completions/min_length": 43.0, "epoch": 1.4899478778853315, "grad_norm": 1.9095873153916918, "kl": 0.462890625, "learning_rate": 9.521998265849827e-07, "loss": -0.0056807431392371655, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1000, "train_speed(iter/s)": 0.029687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/mean_length": 68.20833587646484, "completions/min_length": 43.75, "epoch": 1.4914370811615787, "grad_norm": 0.005862691486166664, "kl": 0.4609375, "learning_rate": 9.520988670329113e-07, "loss": 0.0004605133435688913, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1001, "train_speed(iter/s)": 0.029638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.75, "completions/mean_length": 71.14583587646484, "completions/min_length": 45.75, "epoch": 1.4929262844378257, "grad_norm": 0.006698986557192588, "kl": 0.44482421875, "learning_rate": 9.519978063379371e-07, "loss": 0.0004445922968443483, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1002, "train_speed(iter/s)": 0.029643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 69.12500381469727, "completions/min_length": 47.5, "epoch": 1.494415487714073, "grad_norm": 1.0168760208011984, "kl": 0.42822265625, "learning_rate": 9.518966445226691e-07, "loss": -0.00013214217324275523, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1003, "train_speed(iter/s)": 0.029622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 71.02083587646484, "completions/min_length": 49.5, "epoch": 1.4959046909903202, "grad_norm": 0.005240435510015929, "kl": 0.41162109375, "learning_rate": 9.517953816097395e-07, "loss": 0.00041100766975432634, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1004, "train_speed(iter/s)": 0.029633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 73.40625190734863, "completions/min_length": 48.0, "epoch": 1.4973938942665674, "grad_norm": 1.0985719149656068, "kl": 0.42431640625, "learning_rate": 9.516940176218025e-07, "loss": 0.004023757763206959, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1005, "train_speed(iter/s)": 0.029618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.75, "completions/mean_length": 74.09375381469727, "completions/min_length": 51.25, "epoch": 1.4988830975428145, "grad_norm": 2.508456919184845, "kl": 0.42431640625, "learning_rate": 9.51592552581535e-07, "loss": -0.004586396273225546, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.15571126714348793, "rewards/CineAccuracyORM/mean": 0.8125000298023224, "rewards/CineAccuracyORM/std": 0.38131506741046906, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1006, "train_speed(iter/s)": 0.029617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.25, "completions/mean_length": 72.61458396911621, "completions/min_length": 47.0, "epoch": 1.5003723008190617, "grad_norm": 0.006980149281188453, "kl": 0.42822265625, "learning_rate": 9.514909865116367e-07, "loss": 0.0004284114111214876, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1007, "train_speed(iter/s)": 0.029616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 68.96875190734863, "completions/min_length": 44.5, "epoch": 1.501861504095309, "grad_norm": 1.2493510435157387, "kl": 0.43115234375, "learning_rate": 9.513893194348299e-07, "loss": 0.019939731806516647, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.45113223791122437, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1008, "train_speed(iter/s)": 0.029628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 70.83333587646484, "completions/min_length": 48.25, "epoch": 1.5033507073715562, "grad_norm": 0.5977529995472506, "kl": 0.4267578125, "learning_rate": 9.512875513738592e-07, "loss": -0.003117080545052886, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1009, "train_speed(iter/s)": 0.029626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/mean_length": 72.44791984558105, "completions/min_length": 51.5, "epoch": 1.5048399106478034, "grad_norm": 0.7668983581812885, "kl": 0.43603515625, "learning_rate": 9.511856823514922e-07, "loss": -0.007657572161406279, "memory(GiB)": 112.53, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1010, "train_speed(iter/s)": 0.029628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 75.35416793823242, "completions/min_length": 47.25, "epoch": 1.5063291139240507, "grad_norm": 1.475410356890376, "kl": 0.423828125, "learning_rate": 9.510837123905189e-07, "loss": -0.0021954714320600033, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1011, "train_speed(iter/s)": 0.029631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.75, "completions/mean_length": 68.61458587646484, "completions/min_length": 44.25, "epoch": 1.507818317200298, "grad_norm": 1.5356731476725085, "kl": 0.43701171875, "learning_rate": 9.509816415137516e-07, "loss": -0.0024610678665339947, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.7083333395421505, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1012, "train_speed(iter/s)": 0.029626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 72.59375190734863, "completions/min_length": 50.25, "epoch": 1.5093075204765452, "grad_norm": 0.007781950537459593, "kl": 0.42822265625, "learning_rate": 9.508794697440256e-07, "loss": 0.0004279401618987322, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1013, "train_speed(iter/s)": 0.02962 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.5, "completions/mean_length": 71.12500190734863, "completions/min_length": 50.0, "epoch": 1.5107967237527924, "grad_norm": 1.2605924892006257, "kl": 0.5048828125, "learning_rate": 9.507771971041989e-07, "loss": -0.001588099985383451, "memory(GiB)": 112.53, "reward": 1.8750000596046448, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.26155078411102295, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1014, "train_speed(iter/s)": 0.029623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.25, "completions/mean_length": 71.11458778381348, "completions/min_length": 47.5, "epoch": 1.5122859270290394, "grad_norm": 0.005141066128271136, "kl": 0.431640625, "learning_rate": 9.506748236171514e-07, "loss": 0.00043109478428959846, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1015, "train_speed(iter/s)": 0.029627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.25, "completions/mean_length": 72.10416984558105, "completions/min_length": 48.0, "epoch": 1.5137751303052867, "grad_norm": 0.005897787610999925, "kl": 0.4267578125, "learning_rate": 9.505723493057861e-07, "loss": 0.00042734091402962804, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1016, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.5, "completions/mean_length": 76.82291793823242, "completions/min_length": 49.0, "epoch": 1.515264333581534, "grad_norm": 0.8871673840153972, "kl": 0.42626953125, "learning_rate": 9.504697741930283e-07, "loss": -0.00979770440608263, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1017, "train_speed(iter/s)": 0.029624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.25, "completions/mean_length": 72.64583778381348, "completions/min_length": 46.75, "epoch": 1.5167535368577811, "grad_norm": 1.2024526787247822, "kl": 0.4111328125, "learning_rate": 9.503670983018262e-07, "loss": 0.004896141588687897, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1018, "train_speed(iter/s)": 0.029636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/mean_length": 75.15625190734863, "completions/min_length": 53.25, "epoch": 1.5182427401340282, "grad_norm": 0.7182902397156148, "kl": 0.40283203125, "learning_rate": 9.502643216551501e-07, "loss": 0.004789971746504307, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1019, "train_speed(iter/s)": 0.029639 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.0, "completions/mean_length": 72.39583587646484, "completions/min_length": 53.25, "epoch": 1.5197319434102754, "grad_norm": 0.0057922919467863045, "kl": 0.41162109375, "learning_rate": 9.501614442759931e-07, "loss": 0.0004114179755561054, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1020, "train_speed(iter/s)": 0.02964 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 70.97916793823242, "completions/min_length": 47.0, "epoch": 1.5212211466865226, "grad_norm": 1.7344954804661912, "kl": 0.42041015625, "learning_rate": 9.500584661873707e-07, "loss": 0.0030349406879395247, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.42193564027547836, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1021, "train_speed(iter/s)": 0.029643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/mean_length": 73.62500190734863, "completions/min_length": 48.5, "epoch": 1.5227103499627699, "grad_norm": 2.531093952268486, "kl": 0.42529296875, "learning_rate": 9.499553874123212e-07, "loss": -0.0071932184509932995, "memory(GiB)": 112.53, "reward": 1.4583333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.4583333432674408, "rewards/CineAccuracyORM/std": 0.3568698540329933, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1022, "train_speed(iter/s)": 0.029647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.75, "completions/mean_length": 75.40625381469727, "completions/min_length": 52.75, "epoch": 1.5241995532390171, "grad_norm": 1.397117164681016, "kl": 0.42431640625, "learning_rate": 9.498522079739051e-07, "loss": 0.007496485020965338, "memory(GiB)": 112.53, "reward": 1.854166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1023, "train_speed(iter/s)": 0.029641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 75.01041984558105, "completions/min_length": 50.0, "epoch": 1.5256887565152644, "grad_norm": 0.979366118721097, "kl": 0.43994140625, "learning_rate": 9.497489278952055e-07, "loss": -0.0035388402175158262, "memory(GiB)": 112.53, "reward": 1.5520834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1024, "train_speed(iter/s)": 0.029637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 72.41666984558105, "completions/min_length": 50.25, "epoch": 1.5271779597915116, "grad_norm": 1.1201986038702287, "kl": 0.41796875, "learning_rate": 9.496455471993282e-07, "loss": -0.0011952733621001244, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1025, "train_speed(iter/s)": 0.02964 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 74.86458587646484, "completions/min_length": 51.75, "epoch": 1.5286671630677588, "grad_norm": 0.005002950442716464, "kl": 0.4296875, "learning_rate": 9.495420659094012e-07, "loss": 0.0004296456172596663, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1026, "train_speed(iter/s)": 0.029635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.0, "completions/mean_length": 74.08333587646484, "completions/min_length": 49.75, "epoch": 1.530156366344006, "grad_norm": 0.005247604301721182, "kl": 0.44287109375, "learning_rate": 9.494384840485755e-07, "loss": 0.00044273020466789603, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1027, "train_speed(iter/s)": 0.029637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.5, "completions/mean_length": 77.15625190734863, "completions/min_length": 50.0, "epoch": 1.5316455696202531, "grad_norm": 0.8083263658410688, "kl": 0.38232421875, "learning_rate": 9.493348016400239e-07, "loss": 0.0019950023852288723, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1028, "train_speed(iter/s)": 0.029648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 78.25000381469727, "completions/min_length": 48.75, "epoch": 1.5331347728965004, "grad_norm": 1.3848484098992202, "kl": 0.3955078125, "learning_rate": 9.492310187069423e-07, "loss": 0.005491288378834724, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.30704472959041595, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1029, "train_speed(iter/s)": 0.02965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.25, "completions/mean_length": 76.72916984558105, "completions/min_length": 51.0, "epoch": 1.5346239761727476, "grad_norm": 0.005680057759339906, "kl": 0.4111328125, "learning_rate": 9.49127135272549e-07, "loss": 0.0004112573224119842, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1030, "train_speed(iter/s)": 0.029652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.75, "completions/mean_length": 77.07291793823242, "completions/min_length": 50.75, "epoch": 1.5361131794489948, "grad_norm": 0.004785074153289132, "kl": 0.4130859375, "learning_rate": 9.490231513600841e-07, "loss": 0.00041238972335122526, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1031, "train_speed(iter/s)": 0.029654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/mean_length": 78.73958778381348, "completions/min_length": 52.25, "epoch": 1.5376023827252419, "grad_norm": 1.3792360218628579, "kl": 0.37158203125, "learning_rate": 9.489190669928115e-07, "loss": 0.0008272792911157012, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.21978919208049774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1032, "train_speed(iter/s)": 0.029656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.25, "completions/mean_length": 76.05208587646484, "completions/min_length": 48.75, "epoch": 1.539091586001489, "grad_norm": 1.664098280828207, "kl": 0.392578125, "learning_rate": 9.488148821940163e-07, "loss": -0.015893330797553062, "memory(GiB)": 112.53, "reward": 1.5312500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.5312500037252903, "rewards/CineAccuracyORM/std": 0.33919306844472885, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1033, "train_speed(iter/s)": 0.029657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/mean_length": 72.63541793823242, "completions/min_length": 43.75, "epoch": 1.5405807892777363, "grad_norm": 0.005004987072641869, "kl": 0.4091796875, "learning_rate": 9.487105969870066e-07, "loss": 0.00040955061558634043, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1034, "train_speed(iter/s)": 0.02966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 76.14583587646484, "completions/min_length": 53.0, "epoch": 1.5420699925539836, "grad_norm": 0.9956545896638214, "kl": 0.41943359375, "learning_rate": 9.48606211395113e-07, "loss": -0.0031449696980416775, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1035, "train_speed(iter/s)": 0.029656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 76.43750190734863, "completions/min_length": 52.75, "epoch": 1.5435591958302308, "grad_norm": 1.3927031672065442, "kl": 0.408203125, "learning_rate": 9.485017254416887e-07, "loss": 0.011636081151664257, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.10661446116864681, "rewards/CineAccuracyORM/mean": 0.5729166939854622, "rewards/CineAccuracyORM/std": 0.4232219569385052, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1036, "train_speed(iter/s)": 0.02965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 76.87500190734863, "completions/min_length": 50.5, "epoch": 1.545048399106478, "grad_norm": 1.2084834600222738, "kl": 0.41943359375, "learning_rate": 9.483971391501088e-07, "loss": 0.012560470961034298, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1037, "train_speed(iter/s)": 0.029661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 75.33333778381348, "completions/min_length": 49.25, "epoch": 1.5465376023827253, "grad_norm": 1.031522004834654, "kl": 0.41015625, "learning_rate": 9.482924525437715e-07, "loss": 0.004027306567877531, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1038, "train_speed(iter/s)": 0.02966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.25, "completions/mean_length": 75.16666984558105, "completions/min_length": 48.25, "epoch": 1.5480268056589725, "grad_norm": 0.8036315608512827, "kl": 0.43017578125, "learning_rate": 9.481876656460967e-07, "loss": 0.0030545962508767843, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1039, "train_speed(iter/s)": 0.029637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.25, "completions/mean_length": 72.98958587646484, "completions/min_length": 47.0, "epoch": 1.5495160089352198, "grad_norm": 1.9575861733919653, "kl": 0.40673828125, "learning_rate": 9.480827784805278e-07, "loss": 0.00930616445839405, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.16937757655978203, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.40924668312072754, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1040, "train_speed(iter/s)": 0.029637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/mean_length": 74.57291984558105, "completions/min_length": 51.75, "epoch": 1.5510052122114668, "grad_norm": 1.6755064770986654, "kl": 0.427734375, "learning_rate": 9.479777910705296e-07, "loss": -0.007838904857635498, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5104166846722364, "rewards/CineAccuracyORM/std": 0.36238520964980125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1041, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 70.92708492279053, "completions/min_length": 49.5, "epoch": 1.552494415487714, "grad_norm": 1.2511278697592105, "kl": 0.42822265625, "learning_rate": 9.478727034395897e-07, "loss": 0.0009661701042205095, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1042, "train_speed(iter/s)": 0.029634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/mean_length": 70.48958587646484, "completions/min_length": 45.0, "epoch": 1.5539836187639613, "grad_norm": 0.8425946377629708, "kl": 0.4130859375, "learning_rate": 9.477675156112182e-07, "loss": 0.005194051191210747, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1043, "train_speed(iter/s)": 0.029628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/mean_length": 70.88541793823242, "completions/min_length": 47.5, "epoch": 1.5554728220402085, "grad_norm": 0.005618309415007108, "kl": 0.427734375, "learning_rate": 9.476622276089477e-07, "loss": 0.000427750579547137, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1044, "train_speed(iter/s)": 0.029629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.25, "completions/mean_length": 67.06250095367432, "completions/min_length": 48.25, "epoch": 1.5569620253164556, "grad_norm": 2.4451873626772485, "kl": 0.47705078125, "learning_rate": 9.475568394563329e-07, "loss": 0.0027944082394242287, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.44886354357004166, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1045, "train_speed(iter/s)": 0.029624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.0, "completions/mean_length": 64.80208492279053, "completions/min_length": 40.25, "epoch": 1.5584512285927028, "grad_norm": 1.4394953383336662, "kl": 0.45849609375, "learning_rate": 9.474513511769513e-07, "loss": 0.0021909999195486307, "memory(GiB)": 112.53, "reward": 1.4270834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833460614085, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1046, "train_speed(iter/s)": 0.029613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.5, "completions/mean_length": 71.32291793823242, "completions/min_length": 48.25, "epoch": 1.55994043186895, "grad_norm": 1.4682802048407138, "kl": 0.41845703125, "learning_rate": 9.473457627944026e-07, "loss": 0.005978059023618698, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1047, "train_speed(iter/s)": 0.029624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.75, "completions/mean_length": 71.19791984558105, "completions/min_length": 44.25, "epoch": 1.5614296351451973, "grad_norm": 0.005767492848504828, "kl": 0.41748046875, "learning_rate": 9.472400743323086e-07, "loss": 0.00041806913213804364, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1048, "train_speed(iter/s)": 0.029619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 93.0, "completions/mean_length": 68.14583396911621, "completions/min_length": 46.25, "epoch": 1.5629188384214445, "grad_norm": 1.2410791363265512, "kl": 0.41796875, "learning_rate": 9.471342858143138e-07, "loss": 0.007012970745563507, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5833333535119891, "rewards/CineAccuracyORM/std": 0.3428337797522545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1049, "train_speed(iter/s)": 0.029622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.75, "completions/mean_length": 71.40625190734863, "completions/min_length": 48.5, "epoch": 1.5644080416976918, "grad_norm": 0.709348510756695, "kl": 0.44482421875, "learning_rate": 9.470283972640854e-07, "loss": 0.004099332261830568, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166744276881, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1050, "train_speed(iter/s)": 0.029622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 66.94791889190674, "completions/min_length": 41.75, "epoch": 1.565897244973939, "grad_norm": 0.005917398483740525, "kl": 0.43994140625, "learning_rate": 9.469224087053122e-07, "loss": 0.00043928236118517816, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1051, "train_speed(iter/s)": 0.029625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/mean_length": 69.73958683013916, "completions/min_length": 44.75, "epoch": 1.5673864482501862, "grad_norm": 3.4339546333556923, "kl": 0.416015625, "learning_rate": 9.468163201617061e-07, "loss": 0.0037992983125150204, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1052, "train_speed(iter/s)": 0.029636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.75, "completions/mean_length": 65.21875190734863, "completions/min_length": 45.25, "epoch": 1.5688756515264335, "grad_norm": 1.1387302839146791, "kl": 0.50390625, "learning_rate": 9.46710131657001e-07, "loss": -0.003121759742498398, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1053, "train_speed(iter/s)": 0.029625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 66.83333587646484, "completions/min_length": 47.25, "epoch": 1.5703648548026807, "grad_norm": 1.0525380441694843, "kl": 0.44580078125, "learning_rate": 9.466038432149533e-07, "loss": -0.006553172133862972, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1054, "train_speed(iter/s)": 0.029628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.5, "completions/mean_length": 64.85416984558105, "completions/min_length": 42.25, "epoch": 1.5718540580789278, "grad_norm": 1.6584957213144456, "kl": 0.455078125, "learning_rate": 9.464974548593414e-07, "loss": 0.00044392794370651245, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.6250000298023224, "rewards/CineAccuracyORM/std": 0.48960913717746735, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1055, "train_speed(iter/s)": 0.029618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.25, "completions/mean_length": 66.37500190734863, "completions/min_length": 44.0, "epoch": 1.573343261355175, "grad_norm": 0.9039050863461848, "kl": 0.4482421875, "learning_rate": 9.463909666139666e-07, "loss": 0.004723513964563608, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1056, "train_speed(iter/s)": 0.029621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 66.98958683013916, "completions/min_length": 44.75, "epoch": 1.5748324646314222, "grad_norm": 0.006342802767374596, "kl": 0.44287109375, "learning_rate": 9.462843785026525e-07, "loss": 0.00044290287769399583, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1057, "train_speed(iter/s)": 0.029624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 66.71875095367432, "completions/min_length": 44.5, "epoch": 1.5763216679076693, "grad_norm": 0.005305820179412628, "kl": 0.46240234375, "learning_rate": 9.461776905492444e-07, "loss": 0.000462115160189569, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1058, "train_speed(iter/s)": 0.02962 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.0, "completions/mean_length": 64.98958492279053, "completions/min_length": 43.25, "epoch": 1.5778108711839165, "grad_norm": 3.12105760832656, "kl": 1.19580078125, "learning_rate": 9.460709027776106e-07, "loss": 0.00022420991444960237, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1059, "train_speed(iter/s)": 0.029626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 94.0, "completions/mean_length": 65.33333492279053, "completions/min_length": 41.5, "epoch": 1.5793000744601637, "grad_norm": 1.436740809346599, "kl": 0.44775390625, "learning_rate": 9.459640152116416e-07, "loss": 0.0009157781023532152, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.1322161816060543, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1060, "train_speed(iter/s)": 0.029638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 91.75, "completions/mean_length": 63.43750190734863, "completions/min_length": 42.0, "epoch": 1.580789277736411, "grad_norm": 2.3136486541221073, "kl": 0.4814453125, "learning_rate": 9.458570278752499e-07, "loss": 0.01460857130587101, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4154365845024586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1061, "train_speed(iter/s)": 0.029634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.25, "completions/mean_length": 68.04166984558105, "completions/min_length": 40.75, "epoch": 1.5822784810126582, "grad_norm": 1.7556998165144515, "kl": 0.48828125, "learning_rate": 9.457499407923708e-07, "loss": -0.006696376949548721, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1062, "train_speed(iter/s)": 0.029629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.25, "completions/mean_length": 65.42708587646484, "completions/min_length": 43.5, "epoch": 1.5837676842889055, "grad_norm": 1.6126191140438966, "kl": 0.4345703125, "learning_rate": 9.456427539869614e-07, "loss": -0.007337446324527264, "memory(GiB)": 112.53, "reward": 1.5625000298023224, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.5625000074505806, "rewards/CineAccuracyORM/std": 0.3600961044430733, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1063, "train_speed(iter/s)": 0.029633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.5, "completions/mean_length": 64.21875095367432, "completions/min_length": 43.5, "epoch": 1.5852568875651527, "grad_norm": 0.007105928182768245, "kl": 0.4755859375, "learning_rate": 9.455354674830015e-07, "loss": 0.00047596864169463515, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1064, "train_speed(iter/s)": 0.02962 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.75, "completions/mean_length": 66.42708587646484, "completions/min_length": 44.5, "epoch": 1.5867460908414, "grad_norm": 1.0079235491565905, "kl": 0.44677734375, "learning_rate": 9.45428081304493e-07, "loss": 0.003981039859354496, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1065, "train_speed(iter/s)": 0.029624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 65.05208492279053, "completions/min_length": 43.25, "epoch": 1.5882352941176472, "grad_norm": 1.1899081097157336, "kl": 0.46337890625, "learning_rate": 9.453205954754605e-07, "loss": 0.00712896604090929, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1066, "train_speed(iter/s)": 0.02962 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.5, "completions/mean_length": 65.72916889190674, "completions/min_length": 43.25, "epoch": 1.5897244973938944, "grad_norm": 0.0058984718403585195, "kl": 0.47021484375, "learning_rate": 9.452130100199502e-07, "loss": 0.00047023198567330837, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1067, "train_speed(iter/s)": 0.029622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 66.51041984558105, "completions/min_length": 44.75, "epoch": 1.5912137006701415, "grad_norm": 0.7805506446828849, "kl": 0.43701171875, "learning_rate": 9.451053249620312e-07, "loss": 0.0024177562445402145, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1068, "train_speed(iter/s)": 0.029625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.75, "completions/mean_length": 63.91666889190674, "completions/min_length": 42.75, "epoch": 1.5927029039463887, "grad_norm": 1.1952725605631986, "kl": 0.4638671875, "learning_rate": 9.449975403257945e-07, "loss": 0.002430124208331108, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8437500074505806, "rewards/CineAccuracyORM/std": 0.1978268027305603, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1069, "train_speed(iter/s)": 0.029626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.5, "completions/mean_length": 67.54166793823242, "completions/min_length": 42.25, "epoch": 1.594192107222636, "grad_norm": 1.0503438744150742, "kl": 0.44970703125, "learning_rate": 9.448896561353535e-07, "loss": 0.005093843210488558, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000037252903, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1070, "train_speed(iter/s)": 0.029621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.75, "completions/mean_length": 62.92708492279053, "completions/min_length": 42.0, "epoch": 1.595681310498883, "grad_norm": 1.223429600652411, "kl": 0.48046875, "learning_rate": 9.447816724148441e-07, "loss": 0.006290622055530548, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.4920940324664116, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1071, "train_speed(iter/s)": 0.029608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 64.39583587646484, "completions/min_length": 40.75, "epoch": 1.5971705137751302, "grad_norm": 1.7880281097313515, "kl": 0.4677734375, "learning_rate": 9.446735891884241e-07, "loss": 0.006578392349183559, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.3614596426486969, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1072, "train_speed(iter/s)": 0.029619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.5, "completions/mean_length": 63.55208492279053, "completions/min_length": 40.5, "epoch": 1.5986597170513774, "grad_norm": 0.006067814133919436, "kl": 0.47998046875, "learning_rate": 9.445654064802737e-07, "loss": 0.00047993811313062906, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1073, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.25, "completions/mean_length": 64.36458492279053, "completions/min_length": 40.25, "epoch": 1.6001489203276247, "grad_norm": 0.006849419644138923, "kl": 0.46435546875, "learning_rate": 9.444571243145954e-07, "loss": 0.0004649049951694906, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1074, "train_speed(iter/s)": 0.029635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 65.13541793823242, "completions/min_length": 40.75, "epoch": 1.601638123603872, "grad_norm": 0.8302343809363742, "kl": 0.47412109375, "learning_rate": 9.443487427156141e-07, "loss": 0.01093091070652008, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1075, "train_speed(iter/s)": 0.029646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/mean_length": 66.17708587646484, "completions/min_length": 41.0, "epoch": 1.6031273268801192, "grad_norm": 1.136768658739129, "kl": 0.490234375, "learning_rate": 9.442402617075764e-07, "loss": 0.006801735144108534, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.3015497848391533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1076, "train_speed(iter/s)": 0.029641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 95.75, "completions/mean_length": 62.968750953674316, "completions/min_length": 38.25, "epoch": 1.6046165301563664, "grad_norm": 1.1418152137736637, "kl": 0.4814453125, "learning_rate": 9.44131681314752e-07, "loss": 0.002788897603750229, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1077, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.25, "completions/mean_length": 65.20833396911621, "completions/min_length": 38.25, "epoch": 1.6061057334326136, "grad_norm": 0.9520163082503582, "kl": 0.48876953125, "learning_rate": 9.440230015614318e-07, "loss": -0.006864070892333984, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1078, "train_speed(iter/s)": 0.029633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.75, "completions/mean_length": 61.25000190734863, "completions/min_length": 40.75, "epoch": 1.6075949367088609, "grad_norm": 1.1930460699117746, "kl": 0.53125, "learning_rate": 9.439142224719301e-07, "loss": -0.0007226617890410125, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.3717081770300865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1079, "train_speed(iter/s)": 0.029628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 62.47916889190674, "completions/min_length": 38.0, "epoch": 1.6090841399851081, "grad_norm": 0.9863584798595927, "kl": 0.47509765625, "learning_rate": 9.438053440705822e-07, "loss": 0.005359190981835127, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1080, "train_speed(iter/s)": 0.029638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 86.0, "completions/mean_length": 60.05208492279053, "completions/min_length": 39.0, "epoch": 1.6105733432613552, "grad_norm": 0.778049100900624, "kl": 0.49853515625, "learning_rate": 9.436963663817466e-07, "loss": -0.0023018550127744675, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500102445483, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1081, "train_speed(iter/s)": 0.029642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 90.25, "completions/mean_length": 61.60416793823242, "completions/min_length": 43.0, "epoch": 1.6120625465376024, "grad_norm": 1.2077398223053974, "kl": 0.515625, "learning_rate": 9.435872894298036e-07, "loss": 0.003168639726936817, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1082, "train_speed(iter/s)": 0.029653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.75, "completions/mean_length": 62.22916793823242, "completions/min_length": 40.5, "epoch": 1.6135517498138496, "grad_norm": 0.007160750615547885, "kl": 0.505859375, "learning_rate": 9.434781132391556e-07, "loss": 0.000505293719470501, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1083, "train_speed(iter/s)": 0.029642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 99.25, "completions/mean_length": 63.677085876464844, "completions/min_length": 39.25, "epoch": 1.6150409530900967, "grad_norm": 1.4452872653565787, "kl": 0.478515625, "learning_rate": 9.433688378342273e-07, "loss": 0.005229928530752659, "memory(GiB)": 112.53, "reward": 1.5520834028720856, "reward_std": 0.10661446675658226, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4939185827970505, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1084, "train_speed(iter/s)": 0.029646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 87.25, "completions/mean_length": 61.04166793823242, "completions/min_length": 34.25, "epoch": 1.616530156366344, "grad_norm": 1.3907202022373468, "kl": 0.486328125, "learning_rate": 9.432594632394659e-07, "loss": -0.00980998296290636, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1085, "train_speed(iter/s)": 0.029643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.25, "completions/mean_length": 64.67708492279053, "completions/min_length": 40.75, "epoch": 1.6180193596425911, "grad_norm": 0.8972952426098427, "kl": 0.47509765625, "learning_rate": 9.431499894793402e-07, "loss": -0.011648349463939667, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1086, "train_speed(iter/s)": 0.029638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 61.16666793823242, "completions/min_length": 38.75, "epoch": 1.6195085629188384, "grad_norm": 0.9653972915944997, "kl": 0.48974609375, "learning_rate": 9.430404165783418e-07, "loss": 0.006266753189265728, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1087, "train_speed(iter/s)": 0.029642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.0, "completions/mean_length": 63.15625286102295, "completions/min_length": 36.75, "epoch": 1.6209977661950856, "grad_norm": 0.00763700621119196, "kl": 0.45751953125, "learning_rate": 9.42930744560984e-07, "loss": 0.00045785479596816003, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1088, "train_speed(iter/s)": 0.029652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.75, "completions/mean_length": 65.12500381469727, "completions/min_length": 43.25, "epoch": 1.6224869694713329, "grad_norm": 1.4467137542829245, "kl": 0.486328125, "learning_rate": 9.428209734518025e-07, "loss": -0.01036390382796526, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.1171354167163372, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.3085566312074661, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1089, "train_speed(iter/s)": 0.029655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 101.5, "completions/mean_length": 65.37500190734863, "completions/min_length": 44.75, "epoch": 1.62397617274758, "grad_norm": 0.8571114272649515, "kl": 0.4892578125, "learning_rate": 9.427111032753552e-07, "loss": 0.003975715022534132, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000149011612, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1090, "train_speed(iter/s)": 0.029659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 63.885416984558105, "completions/min_length": 43.25, "epoch": 1.6254653760238273, "grad_norm": 1.5310522769031363, "kl": 0.4892578125, "learning_rate": 9.42601134056222e-07, "loss": 0.001787230372428894, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.43965786695480347, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1091, "train_speed(iter/s)": 0.02967 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 66.58333396911621, "completions/min_length": 39.5, "epoch": 1.6269545793000746, "grad_norm": 1.8022282598276471, "kl": 0.48681640625, "learning_rate": 9.42491065819005e-07, "loss": -0.012183384969830513, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.1451837606728077, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.35253867506980896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1092, "train_speed(iter/s)": 0.029673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 67.63541889190674, "completions/min_length": 38.0, "epoch": 1.6284437825763218, "grad_norm": 1.4761307886861579, "kl": 0.44189453125, "learning_rate": 9.423808985883288e-07, "loss": -0.00022412091493606567, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.10661446675658226, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1093, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.25, "completions/mean_length": 62.85416793823242, "completions/min_length": 41.75, "epoch": 1.6299329858525688, "grad_norm": 1.1870248462522048, "kl": 0.4912109375, "learning_rate": 9.422706323888396e-07, "loss": -0.019398964941501617, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.4703870266675949, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1094, "train_speed(iter/s)": 0.029687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 61.87500286102295, "completions/min_length": 37.75, "epoch": 1.631422189128816, "grad_norm": 0.7906392499887868, "kl": 0.5068359375, "learning_rate": 9.421602672452061e-07, "loss": -0.0006870391662232578, "memory(GiB)": 112.53, "reward": 1.479166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.4791666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1095, "train_speed(iter/s)": 0.029682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 100.0, "completions/mean_length": 64.46875190734863, "completions/min_length": 41.5, "epoch": 1.6329113924050633, "grad_norm": 1.3861406995952867, "kl": 0.474609375, "learning_rate": 9.420498031821189e-07, "loss": 0.007439611479640007, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.13548902794718742, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1096, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.75, "completions/mean_length": 65.58333587646484, "completions/min_length": 45.5, "epoch": 1.6344005956813104, "grad_norm": 1.9730595982219454, "kl": 0.47265625, "learning_rate": 9.419392402242911e-07, "loss": -0.00043496800935827196, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.3401750475168228, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1097, "train_speed(iter/s)": 0.029703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 68.06250190734863, "completions/min_length": 40.5, "epoch": 1.6358897989575576, "grad_norm": 0.006106315900563864, "kl": 0.47119140625, "learning_rate": 9.418285783964574e-07, "loss": 0.0004714453243650496, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1098, "train_speed(iter/s)": 0.029706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.0, "completions/mean_length": 64.05208492279053, "completions/min_length": 40.0, "epoch": 1.6373790022338048, "grad_norm": 1.112834453614805, "kl": 0.4580078125, "learning_rate": 9.417178177233751e-07, "loss": 0.011208845302462578, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4595789238810539, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1099, "train_speed(iter/s)": 0.029716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 85.75, "completions/mean_length": 60.56250190734863, "completions/min_length": 40.75, "epoch": 1.638868205510052, "grad_norm": 0.006679324527461192, "kl": 0.45263671875, "learning_rate": 9.416069582298234e-07, "loss": 0.0004526518168859184, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1100, "train_speed(iter/s)": 0.029728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.5, "completions/mean_length": 67.32291984558105, "completions/min_length": 45.75, "epoch": 1.6403574087862993, "grad_norm": 0.7939082765658918, "kl": 0.45458984375, "learning_rate": 9.414959999406036e-07, "loss": -0.008369444869458675, "memory(GiB)": 112.53, "reward": 1.4270834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833460614085, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1101, "train_speed(iter/s)": 0.029725 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.5, "completions/mean_length": 67.26041984558105, "completions/min_length": 45.25, "epoch": 1.6418466120625466, "grad_norm": 0.00588996137198769, "kl": 0.4501953125, "learning_rate": 9.413849428805389e-07, "loss": 0.00044942181557416916, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1102, "train_speed(iter/s)": 0.029722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 104.0, "completions/mean_length": 64.42708587646484, "completions/min_length": 41.5, "epoch": 1.6433358153387938, "grad_norm": 1.6447978739263955, "kl": 0.453125, "learning_rate": 9.412737870744751e-07, "loss": 0.0020942953415215015, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1103, "train_speed(iter/s)": 0.029732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.25, "completions/mean_length": 66.87500190734863, "completions/min_length": 42.25, "epoch": 1.644825018615041, "grad_norm": 1.56773852764629, "kl": 0.47900390625, "learning_rate": 9.411625325472799e-07, "loss": -0.006097673438489437, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.10205466859042645, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.4595789238810539, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1104, "train_speed(iter/s)": 0.029729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.0, "completions/mean_length": 71.58333587646484, "completions/min_length": 42.25, "epoch": 1.6463142218912883, "grad_norm": 0.7100227524792891, "kl": 0.44873046875, "learning_rate": 9.410511793238427e-07, "loss": -0.004805113188922405, "memory(GiB)": 112.53, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1105, "train_speed(iter/s)": 0.029724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.25, "completions/mean_length": 70.92708587646484, "completions/min_length": 43.0, "epoch": 1.6478034251675355, "grad_norm": 1.1706799649662485, "kl": 0.43994140625, "learning_rate": 9.409397274290755e-07, "loss": 0.019473418593406677, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1106, "train_speed(iter/s)": 0.02972 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 64.6979169845581, "completions/min_length": 46.75, "epoch": 1.6492926284437825, "grad_norm": 1.4969410946425106, "kl": 0.4833984375, "learning_rate": 9.408281768879121e-07, "loss": 0.003534941002726555, "memory(GiB)": 112.53, "reward": 1.4687500596046448, "reward_std": 0.10205466859042645, "rewards/CineAccuracyORM/mean": 0.468750006519258, "rewards/CineAccuracyORM/std": 0.26659026369452477, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1107, "train_speed(iter/s)": 0.029714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 68.36458587646484, "completions/min_length": 41.75, "epoch": 1.6507818317200298, "grad_norm": 0.909639547881646, "kl": 0.47265625, "learning_rate": 9.407165277253084e-07, "loss": 0.0070008388720452785, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1108, "train_speed(iter/s)": 0.029716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.75, "completions/mean_length": 65.60416793823242, "completions/min_length": 42.5, "epoch": 1.652271034996277, "grad_norm": 1.1706234054857174, "kl": 0.46337890625, "learning_rate": 9.406047799662425e-07, "loss": 0.012803099118173122, "memory(GiB)": 112.53, "reward": 1.7083334028720856, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.4190470837056637, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1109, "train_speed(iter/s)": 0.029716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 70.28125190734863, "completions/min_length": 43.75, "epoch": 1.653760238272524, "grad_norm": 1.9647893005336636, "kl": 0.43212890625, "learning_rate": 9.404929336357141e-07, "loss": 0.0012476792326197028, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.17920634150505066, "rewards/CineAccuracyORM/mean": 0.5312500204890966, "rewards/CineAccuracyORM/std": 0.4205836355686188, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1110, "train_speed(iter/s)": 0.029705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 92.25, "completions/mean_length": 64.43750190734863, "completions/min_length": 41.75, "epoch": 1.6552494415487713, "grad_norm": 1.0647837382936705, "kl": 0.49462890625, "learning_rate": 9.403809887587457e-07, "loss": -0.0076274266466498375, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1111, "train_speed(iter/s)": 0.0297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 97.5, "completions/mean_length": 70.35417175292969, "completions/min_length": 40.25, "epoch": 1.6567386448250185, "grad_norm": 1.3080787099002151, "kl": 0.44677734375, "learning_rate": 9.402689453603814e-07, "loss": -0.00956945400685072, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.2472676783800125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1112, "train_speed(iter/s)": 0.0297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.75, "completions/mean_length": 68.07291984558105, "completions/min_length": 43.5, "epoch": 1.6582278481012658, "grad_norm": 0.005710944489214294, "kl": 0.46435546875, "learning_rate": 9.401568034656871e-07, "loss": 0.0004644209984689951, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1113, "train_speed(iter/s)": 0.029706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 70.22916889190674, "completions/min_length": 43.5, "epoch": 1.659717051377513, "grad_norm": 1.475512402435842, "kl": 0.43212890625, "learning_rate": 9.400445630997513e-07, "loss": -0.0007448060205206275, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.11713542230427265, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.4263497106730938, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1114, "train_speed(iter/s)": 0.029708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.75, "completions/mean_length": 69.66666984558105, "completions/min_length": 45.25, "epoch": 1.6612062546537603, "grad_norm": 1.4382952647025637, "kl": 0.44677734375, "learning_rate": 9.399322242876841e-07, "loss": -0.009186133742332458, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.3307463824748993, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1115, "train_speed(iter/s)": 0.029709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/mean_length": 69.08333396911621, "completions/min_length": 42.25, "epoch": 1.6626954579300075, "grad_norm": 1.6613958929515078, "kl": 0.4482421875, "learning_rate": 9.398197870546178e-07, "loss": -0.015625696629285812, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.1420449409633875, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.4092426933348179, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1116, "train_speed(iter/s)": 0.029712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 74.35416793823242, "completions/min_length": 48.5, "epoch": 1.6641846612062547, "grad_norm": 0.005659137057286327, "kl": 0.42578125, "learning_rate": 9.397072514257068e-07, "loss": 0.0004250369966030121, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1117, "train_speed(iter/s)": 0.029706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 67.34375381469727, "completions/min_length": 43.75, "epoch": 1.665673864482502, "grad_norm": 0.0056387895364156575, "kl": 0.4140625, "learning_rate": 9.395946174261274e-07, "loss": 0.0004134889168199152, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1118, "train_speed(iter/s)": 0.029716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 72.96875381469727, "completions/min_length": 45.75, "epoch": 1.6671630677587492, "grad_norm": 1.6897843543347633, "kl": 0.44091796875, "learning_rate": 9.394818850810777e-07, "loss": -0.01674286462366581, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.11258216388523579, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.4650956019759178, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1119, "train_speed(iter/s)": 0.029719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 98.5, "completions/mean_length": 69.81250381469727, "completions/min_length": 45.75, "epoch": 1.6686522710349962, "grad_norm": 1.4438390688878109, "kl": 0.4423828125, "learning_rate": 9.393690544157782e-07, "loss": 0.011068690568208694, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.12696419283747673, "rewards/CineAccuracyORM/mean": 0.802083358168602, "rewards/CineAccuracyORM/std": 0.37195129320025444, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1120, "train_speed(iter/s)": 0.029715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 70.14583587646484, "completions/min_length": 47.0, "epoch": 1.6701414743112435, "grad_norm": 1.9193991944984885, "kl": 0.435546875, "learning_rate": 9.392561254554711e-07, "loss": 0.025555705651640892, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.14063050784170628, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.3047977685928345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1121, "train_speed(iter/s)": 0.02971 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.75, "completions/mean_length": 70.79166984558105, "completions/min_length": 45.75, "epoch": 1.6716306775874907, "grad_norm": 0.8250104894326333, "kl": 0.43359375, "learning_rate": 9.39143098225421e-07, "loss": 0.004493199288845062, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1122, "train_speed(iter/s)": 0.02972 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 71.30208778381348, "completions/min_length": 45.5, "epoch": 1.6731198808637378, "grad_norm": 0.006066457936480404, "kl": 0.44189453125, "learning_rate": 9.390299727509137e-07, "loss": 0.0004410591791383922, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1123, "train_speed(iter/s)": 0.029704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 74.19791793823242, "completions/min_length": 46.5, "epoch": 1.674609084139985, "grad_norm": 1.6736615151141219, "kl": 0.44189453125, "learning_rate": 9.38916749057258e-07, "loss": 0.008098164573311806, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.14974356442689896, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.32238753139972687, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1124, "train_speed(iter/s)": 0.029709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.0, "completions/mean_length": 72.21875190734863, "completions/min_length": 41.25, "epoch": 1.6760982874162322, "grad_norm": 1.6789157773820118, "kl": 0.4306640625, "learning_rate": 9.388034271697837e-07, "loss": -0.008301343768835068, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.1329318843781948, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.3987635113298893, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1125, "train_speed(iter/s)": 0.02971 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 71.25, "completions/min_length": 41.0, "epoch": 1.6775874906924795, "grad_norm": 1.7152230594165145, "kl": 0.419921875, "learning_rate": 9.386900071138432e-07, "loss": 0.00043698959052562714, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.1171354278922081, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.2057548388838768, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1126, "train_speed(iter/s)": 0.029712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 72.56250381469727, "completions/min_length": 43.5, "epoch": 1.6790766939687267, "grad_norm": 1.4301670670460405, "kl": 0.4287109375, "learning_rate": 9.385764889148107e-07, "loss": 0.0004770242958329618, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.35134297609329224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1127, "train_speed(iter/s)": 0.029722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 96.5, "completions/mean_length": 68.60416793823242, "completions/min_length": 43.25, "epoch": 1.680565897244974, "grad_norm": 1.2305455579164395, "kl": 0.4658203125, "learning_rate": 9.384628725980822e-07, "loss": 0.00046539781033061445, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.40968769788742065, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1128, "train_speed(iter/s)": 0.029728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.75, "completions/mean_length": 71.62500381469727, "completions/min_length": 47.25, "epoch": 1.6820551005212212, "grad_norm": 1.51891218092517, "kl": 0.43359375, "learning_rate": 9.383491581890759e-07, "loss": -0.002952208276838064, "memory(GiB)": 112.53, "reward": 1.885416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8854166865348816, "rewards/CineAccuracyORM/std": 0.24199935421347618, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1129, "train_speed(iter/s)": 0.029723 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.25, "completions/mean_length": 75.07291793823242, "completions/min_length": 48.25, "epoch": 1.6835443037974684, "grad_norm": 1.80331322836313, "kl": 0.4345703125, "learning_rate": 9.382353457132317e-07, "loss": 0.0001704581081867218, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.5000000102445483, "rewards/CineAccuracyORM/std": 0.4092426933348179, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1130, "train_speed(iter/s)": 0.029709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/mean_length": 72.40625190734863, "completions/min_length": 44.0, "epoch": 1.6850335070737157, "grad_norm": 0.7509726999515935, "kl": 0.42626953125, "learning_rate": 9.381214351960118e-07, "loss": 0.005715790670365095, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1131, "train_speed(iter/s)": 0.029711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 71.14583587646484, "completions/min_length": 45.75, "epoch": 1.686522710349963, "grad_norm": 2.1098019301288256, "kl": 0.4453125, "learning_rate": 9.380074266628998e-07, "loss": -0.011522977612912655, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.25314687192440033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1132, "train_speed(iter/s)": 0.029721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.25, "completions/mean_length": 69.47917175292969, "completions/min_length": 44.0, "epoch": 1.68801191362621, "grad_norm": 0.94784765948055, "kl": 0.42919921875, "learning_rate": 9.378933201394018e-07, "loss": -0.0024241593200713396, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1133, "train_speed(iter/s)": 0.029731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 73.28125, "completions/min_length": 47.5, "epoch": 1.6895011169024572, "grad_norm": 1.6680256641925548, "kl": 0.44091796875, "learning_rate": 9.377791156510454e-07, "loss": 0.0047445353120565414, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4150669574737549, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1134, "train_speed(iter/s)": 0.029732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.0, "completions/mean_length": 69.51041984558105, "completions/min_length": 42.75, "epoch": 1.6909903201787044, "grad_norm": 0.854403262149689, "kl": 0.443359375, "learning_rate": 9.376648132233803e-07, "loss": 0.004799488000571728, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1135, "train_speed(iter/s)": 0.029743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 72.60416984558105, "completions/min_length": 46.25, "epoch": 1.6924795234549515, "grad_norm": 0.8857430692306255, "kl": 0.43603515625, "learning_rate": 9.375504128819778e-07, "loss": 0.007448127493262291, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1136, "train_speed(iter/s)": 0.029737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 70.51041984558105, "completions/min_length": 45.25, "epoch": 1.6939687267311987, "grad_norm": 1.2216387349182123, "kl": 0.46630859375, "learning_rate": 9.374359146524317e-07, "loss": 0.011416202411055565, "memory(GiB)": 112.53, "reward": 1.4895833730697632, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.4957045316696167, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1137, "train_speed(iter/s)": 0.029718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 74.28125, "completions/min_length": 47.25, "epoch": 1.695457930007446, "grad_norm": 1.3487983164708, "kl": 0.43798828125, "learning_rate": 9.373213185603574e-07, "loss": -0.011157052591443062, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1138, "train_speed(iter/s)": 0.02972 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 73.93750190734863, "completions/min_length": 43.0, "epoch": 1.6969471332836932, "grad_norm": 0.6887425562491548, "kl": 0.4267578125, "learning_rate": 9.372066246313921e-07, "loss": 0.002430311404168606, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1139, "train_speed(iter/s)": 0.029713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 73.30208396911621, "completions/min_length": 45.5, "epoch": 1.6984363365599404, "grad_norm": 0.6419687065112339, "kl": 0.40966796875, "learning_rate": 9.370918328911949e-07, "loss": 0.0006177043542265892, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1140, "train_speed(iter/s)": 0.029722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.75, "completions/mean_length": 73.00000190734863, "completions/min_length": 45.0, "epoch": 1.6999255398361877, "grad_norm": 0.005757870942262981, "kl": 0.44970703125, "learning_rate": 9.369769433654469e-07, "loss": 0.000449654005933553, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1141, "train_speed(iter/s)": 0.029722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 71.89583587646484, "completions/min_length": 45.5, "epoch": 1.701414743112435, "grad_norm": 0.005632693909824986, "kl": 0.4423828125, "learning_rate": 9.36861956079851e-07, "loss": 0.0004423797072377056, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1142, "train_speed(iter/s)": 0.029716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 67.79166793823242, "completions/min_length": 40.75, "epoch": 1.7029039463886821, "grad_norm": 1.1634297602466737, "kl": 0.45458984375, "learning_rate": 9.367468710601319e-07, "loss": -0.007458231877535582, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4772557094693184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1143, "train_speed(iter/s)": 0.029705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 72.46875190734863, "completions/min_length": 44.25, "epoch": 1.7043931496649294, "grad_norm": 1.267906437347807, "kl": 0.4169921875, "learning_rate": 9.366316883320363e-07, "loss": 0.005832139402627945, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1144, "train_speed(iter/s)": 0.029714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 73.53125381469727, "completions/min_length": 44.0, "epoch": 1.7058823529411766, "grad_norm": 1.6701997448723775, "kl": 0.416015625, "learning_rate": 9.365164079213328e-07, "loss": -0.012472948990762234, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.614583358168602, "rewards/CineAccuracyORM/std": 0.4761725142598152, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1145, "train_speed(iter/s)": 0.029723 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.5, "completions/mean_length": 71.27083587646484, "completions/min_length": 42.75, "epoch": 1.7073715562174236, "grad_norm": 1.7539582665013702, "kl": 0.4453125, "learning_rate": 9.364010298538116e-07, "loss": 0.010545007884502411, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.16340987384319305, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.35134297609329224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1146, "train_speed(iter/s)": 0.029725 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.5, "completions/mean_length": 72.97916984558105, "completions/min_length": 49.5, "epoch": 1.7088607594936709, "grad_norm": 1.2554471853653881, "kl": 0.41259765625, "learning_rate": 9.362855541552852e-07, "loss": 0.004785744473338127, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.33468010276556015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1147, "train_speed(iter/s)": 0.02972 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.75, "completions/mean_length": 71.85416984558105, "completions/min_length": 41.75, "epoch": 1.7103499627699181, "grad_norm": 0.8698138635516094, "kl": 0.4384765625, "learning_rate": 9.361699808515875e-07, "loss": -0.003835922572761774, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1148, "train_speed(iter/s)": 0.02973 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.75, "completions/mean_length": 74.18750381469727, "completions/min_length": 45.75, "epoch": 1.7118391660461652, "grad_norm": 1.1461859679002167, "kl": 0.43994140625, "learning_rate": 9.360543099685742e-07, "loss": -0.005270620342344046, "memory(GiB)": 112.53, "reward": 1.4895833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.4895833507180214, "rewards/CineAccuracyORM/std": 0.46742958575487137, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1149, "train_speed(iter/s)": 0.029717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.25, "completions/mean_length": 74.00000190734863, "completions/min_length": 48.0, "epoch": 1.7133283693224124, "grad_norm": 0.0050323553959404355, "kl": 0.42626953125, "learning_rate": 9.359385415321233e-07, "loss": 0.00042642277549020946, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1150, "train_speed(iter/s)": 0.029713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 77.34375190734863, "completions/min_length": 46.75, "epoch": 1.7148175725986596, "grad_norm": 1.5261324083854393, "kl": 0.4189453125, "learning_rate": 9.358226755681342e-07, "loss": -0.004992691334336996, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.1451837606728077, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.35134297609329224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1151, "train_speed(iter/s)": 0.029714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.5, "completions/mean_length": 74.44791984558105, "completions/min_length": 41.0, "epoch": 1.7163067758749069, "grad_norm": 0.9243827211571819, "kl": 0.41650390625, "learning_rate": 9.357067121025284e-07, "loss": 0.006068405695259571, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1152, "train_speed(iter/s)": 0.029716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 77.31250190734863, "completions/min_length": 42.75, "epoch": 1.7177959791511541, "grad_norm": 0.6862519246959383, "kl": 0.40478515625, "learning_rate": 9.355906511612489e-07, "loss": -0.006157211028039455, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1153, "train_speed(iter/s)": 0.029718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 70.43750381469727, "completions/min_length": 43.75, "epoch": 1.7192851824274014, "grad_norm": 2.4910831786275125, "kl": 0.4521484375, "learning_rate": 9.354744927702607e-07, "loss": -0.001205306500196457, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.12169522047042847, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.4120442271232605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1154, "train_speed(iter/s)": 0.029713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.5, "completions/mean_length": 75.85416984558105, "completions/min_length": 46.75, "epoch": 1.7207743857036486, "grad_norm": 0.9425067185255681, "kl": 0.4052734375, "learning_rate": 9.353582369555509e-07, "loss": 0.0026440308429300785, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1155, "train_speed(iter/s)": 0.029715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.75, "completions/mean_length": 74.43750381469727, "completions/min_length": 43.5, "epoch": 1.7222635889798958, "grad_norm": 1.086630555384889, "kl": 0.408203125, "learning_rate": 9.352418837431276e-07, "loss": 0.004771728999912739, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1156, "train_speed(iter/s)": 0.029711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 80.29166793823242, "completions/min_length": 48.5, "epoch": 1.723752792256143, "grad_norm": 1.268522045395633, "kl": 0.40576171875, "learning_rate": 9.351254331590215e-07, "loss": -0.003887362778186798, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.770833358168602, "rewards/CineAccuracyORM/std": 0.35253867506980896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1157, "train_speed(iter/s)": 0.029704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 78.77083396911621, "completions/min_length": 49.25, "epoch": 1.7252419955323903, "grad_norm": 1.2149789667459225, "kl": 0.39501953125, "learning_rate": 9.350088852292847e-07, "loss": 0.0033736671321094036, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.372190922498703, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1158, "train_speed(iter/s)": 0.029714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.25, "completions/mean_length": 74.46875381469727, "completions/min_length": 49.0, "epoch": 1.7267311988086373, "grad_norm": 1.6179719190223996, "kl": 0.4248046875, "learning_rate": 9.34892239979991e-07, "loss": 0.006897700484842062, "memory(GiB)": 112.53, "reward": 1.5104166865348816, "reward_std": 0.11258216388523579, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.4976107105612755, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1159, "train_speed(iter/s)": 0.029716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 76.94791984558105, "completions/min_length": 48.25, "epoch": 1.7282204020848846, "grad_norm": 1.1229689243379002, "kl": 0.41455078125, "learning_rate": 9.347754974372364e-07, "loss": 0.0021686144173145294, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.08311937749385834, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1160, "train_speed(iter/s)": 0.029705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.75, "completions/mean_length": 74.53125190734863, "completions/min_length": 49.25, "epoch": 1.7297096053611318, "grad_norm": 0.7342673860898924, "kl": 0.43994140625, "learning_rate": 9.34658657627138e-07, "loss": 0.0063349902629852295, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1161, "train_speed(iter/s)": 0.029708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 73.57291984558105, "completions/min_length": 42.25, "epoch": 1.7311988086373788, "grad_norm": 0.9561109471382357, "kl": 0.41015625, "learning_rate": 9.345417205758353e-07, "loss": -0.012435040436685085, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1162, "train_speed(iter/s)": 0.029717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 78.97916984558105, "completions/min_length": 49.0, "epoch": 1.732688011913626, "grad_norm": 1.0982570854935627, "kl": 0.40380859375, "learning_rate": 9.344246863094891e-07, "loss": 0.010741928592324257, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5000000102445483, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1163, "train_speed(iter/s)": 0.029717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 76.46875190734863, "completions/min_length": 49.75, "epoch": 1.7341772151898733, "grad_norm": 1.6746211039977668, "kl": 0.41796875, "learning_rate": 9.343075548542823e-07, "loss": 0.019143415614962578, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.1588566154241562, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3362043872475624, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1164, "train_speed(iter/s)": 0.029712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.25, "completions/mean_length": 72.32291984558105, "completions/min_length": 42.75, "epoch": 1.7356664184661206, "grad_norm": 0.0053907298343579685, "kl": 0.427734375, "learning_rate": 9.341903262364193e-07, "loss": 0.00042801472591236234, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1165, "train_speed(iter/s)": 0.029722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 76.54166984558105, "completions/min_length": 44.5, "epoch": 1.7371556217423678, "grad_norm": 0.6040765182587097, "kl": 0.41748046875, "learning_rate": 9.340730004821265e-07, "loss": -0.007379948627203703, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1166, "train_speed(iter/s)": 0.029717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 74.50000190734863, "completions/min_length": 41.75, "epoch": 1.738644825018615, "grad_norm": 0.8002667424953764, "kl": 0.4345703125, "learning_rate": 9.339555776176514e-07, "loss": -0.0008405502885580063, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1167, "train_speed(iter/s)": 0.029727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.0, "completions/mean_length": 80.65625190734863, "completions/min_length": 48.75, "epoch": 1.7401340282948623, "grad_norm": 1.2625281905495533, "kl": 0.40380859375, "learning_rate": 9.338380576692642e-07, "loss": 0.002134147798642516, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.802083358168602, "rewards/CineAccuracyORM/std": 0.33163563907146454, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1168, "train_speed(iter/s)": 0.02973 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 77.69791984558105, "completions/min_length": 46.75, "epoch": 1.7416232315711095, "grad_norm": 1.067227804158998, "kl": 0.39404296875, "learning_rate": 9.337204406632558e-07, "loss": 0.004719126503914595, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1169, "train_speed(iter/s)": 0.029739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.75, "completions/mean_length": 75.98958587646484, "completions/min_length": 49.5, "epoch": 1.7431124348473568, "grad_norm": 1.3979554680428294, "kl": 0.40283203125, "learning_rate": 9.336027266259398e-07, "loss": 0.0075975507497787476, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.44886354357004166, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1170, "train_speed(iter/s)": 0.029732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 74.70833587646484, "completions/min_length": 46.0, "epoch": 1.744601638123604, "grad_norm": 0.005285499076116314, "kl": 0.40966796875, "learning_rate": 9.334849155836507e-07, "loss": 0.0004087777924723923, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1171, "train_speed(iter/s)": 0.029735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 78.03125190734863, "completions/min_length": 47.75, "epoch": 1.746090841399851, "grad_norm": 1.7735185970663416, "kl": 0.42578125, "learning_rate": 9.333670075627449e-07, "loss": 0.03330165147781372, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.1602645143866539, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.35820145905017853, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1172, "train_speed(iter/s)": 0.029735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 78.67708778381348, "completions/min_length": 51.25, "epoch": 1.7475800446760983, "grad_norm": 1.2629424023729379, "kl": 0.42724609375, "learning_rate": 9.332490025896009e-07, "loss": 0.0015192334540188313, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.08908708393573761, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1173, "train_speed(iter/s)": 0.029723 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 73.52083778381348, "completions/min_length": 45.75, "epoch": 1.7490692479523455, "grad_norm": 1.4547547200668325, "kl": 0.40625, "learning_rate": 9.331309006906185e-07, "loss": -0.01104690507054329, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1174, "train_speed(iter/s)": 0.029732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 74.35416793823242, "completions/min_length": 46.25, "epoch": 1.7505584512285925, "grad_norm": 0.6986618143100687, "kl": 0.42578125, "learning_rate": 9.330127018922193e-07, "loss": -0.0030106627382338047, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1175, "train_speed(iter/s)": 0.029737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.25, "completions/mean_length": 78.625, "completions/min_length": 48.0, "epoch": 1.7520476545048398, "grad_norm": 1.6843463635446678, "kl": 0.39453125, "learning_rate": 9.328944062208466e-07, "loss": 0.0076832580380141735, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6145833488553762, "rewards/CineAccuracyORM/std": 0.30704472959041595, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1176, "train_speed(iter/s)": 0.029735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 73.57291793823242, "completions/min_length": 46.5, "epoch": 1.753536857781087, "grad_norm": 0.005886952671920353, "kl": 0.4130859375, "learning_rate": 9.327760137029651e-07, "loss": 0.00041339831659570336, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1177, "train_speed(iter/s)": 0.029737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 73.88541984558105, "completions/min_length": 40.75, "epoch": 1.7550260610573343, "grad_norm": 0.7401692958463127, "kl": 0.4228515625, "learning_rate": 9.326575243650616e-07, "loss": 0.00019774449174292386, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1178, "train_speed(iter/s)": 0.029746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 74.25000381469727, "completions/min_length": 46.25, "epoch": 1.7565152643335815, "grad_norm": 0.00528794540514011, "kl": 0.4091796875, "learning_rate": 9.325389382336446e-07, "loss": 0.0004086688277311623, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1179, "train_speed(iter/s)": 0.029745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 79.02083396911621, "completions/min_length": 45.25, "epoch": 1.7580044676098288, "grad_norm": 1.0520078798223782, "kl": 0.3876953125, "learning_rate": 9.324202553352436e-07, "loss": -0.0033241375349462032, "memory(GiB)": 112.53, "reward": 1.4687500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.4687500074505806, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1180, "train_speed(iter/s)": 0.029749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 76.72916984558105, "completions/min_length": 46.5, "epoch": 1.759493670886076, "grad_norm": 0.005465016753697009, "kl": 0.42333984375, "learning_rate": 9.323014756964103e-07, "loss": 0.00042342659435234964, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1181, "train_speed(iter/s)": 0.029745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 72.83333778381348, "completions/min_length": 42.0, "epoch": 1.7609828741623232, "grad_norm": 1.2935403616586378, "kl": 0.41455078125, "learning_rate": 9.32182599343718e-07, "loss": -0.0044592092745006084, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.2508781775832176, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1182, "train_speed(iter/s)": 0.029748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 79.46875381469727, "completions/min_length": 44.0, "epoch": 1.7624720774385705, "grad_norm": 0.004986628126174358, "kl": 0.40771484375, "learning_rate": 9.320636263037614e-07, "loss": 0.0004079401260241866, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1183, "train_speed(iter/s)": 0.029735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 76.85417175292969, "completions/min_length": 48.0, "epoch": 1.7639612807148177, "grad_norm": 1.4025471541600754, "kl": 0.42138671875, "learning_rate": 9.31944556603157e-07, "loss": 0.02891266904771328, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.12169522047042847, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.3967231586575508, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1184, "train_speed(iter/s)": 0.029737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 76.42708587646484, "completions/min_length": 49.0, "epoch": 1.7654504839910647, "grad_norm": 0.9064036414314984, "kl": 0.40576171875, "learning_rate": 9.318253902685431e-07, "loss": -0.007412451319396496, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1185, "train_speed(iter/s)": 0.029734 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.0, "completions/mean_length": 78.16666984558105, "completions/min_length": 47.5, "epoch": 1.766939687267312, "grad_norm": 1.417558149138211, "kl": 0.396484375, "learning_rate": 9.317061273265792e-07, "loss": -0.013566315174102783, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.11258216947317123, "rewards/CineAccuracyORM/mean": 0.6770833395421505, "rewards/CineAccuracyORM/std": 0.30001722276210785, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1186, "train_speed(iter/s)": 0.029743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.0, "completions/mean_length": 75.14583587646484, "completions/min_length": 42.0, "epoch": 1.7684288905435592, "grad_norm": 1.3143306423511336, "kl": 0.41748046875, "learning_rate": 9.315867678039468e-07, "loss": 0.002338235266506672, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.5416666772216558, "rewards/CineAccuracyORM/std": 0.316870853304863, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1187, "train_speed(iter/s)": 0.029745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 76.73958587646484, "completions/min_length": 47.0, "epoch": 1.7699180938198062, "grad_norm": 0.8751057678203343, "kl": 0.4169921875, "learning_rate": 9.314673117273486e-07, "loss": 0.00724344328045845, "memory(GiB)": 112.53, "reward": 1.3645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.3645833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1188, "train_speed(iter/s)": 0.029741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 79.04166984558105, "completions/min_length": 47.0, "epoch": 1.7714072970960535, "grad_norm": 1.4842302116699657, "kl": 0.396484375, "learning_rate": 9.313477591235094e-07, "loss": -0.004576362203806639, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1189, "train_speed(iter/s)": 0.029743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.5, "completions/mean_length": 72.38541984558105, "completions/min_length": 41.75, "epoch": 1.7728965003723007, "grad_norm": 2.0190834425392867, "kl": 0.4052734375, "learning_rate": 9.312281100191751e-07, "loss": 0.004943002015352249, "memory(GiB)": 112.53, "reward": 1.4583333730697632, "reward_std": 0.1497435588389635, "rewards/CineAccuracyORM/mean": 0.45833335630595684, "rewards/CineAccuracyORM/std": 0.4231211394071579, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1190, "train_speed(iter/s)": 0.029745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 82.31250190734863, "completions/min_length": 52.75, "epoch": 1.774385703648548, "grad_norm": 1.2705752143349116, "kl": 0.38134765625, "learning_rate": 9.311083644411137e-07, "loss": 0.008852319791913033, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.4190470837056637, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1191, "train_speed(iter/s)": 0.029727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.75, "completions/mean_length": 71.61458587646484, "completions/min_length": 48.25, "epoch": 1.7758749069247952, "grad_norm": 0.00601781073789185, "kl": 0.43359375, "learning_rate": 9.309885224161143e-07, "loss": 0.0004334768163971603, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1192, "train_speed(iter/s)": 0.02973 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 78.28125, "completions/min_length": 49.5, "epoch": 1.7773641102010425, "grad_norm": 1.40316594252326, "kl": 0.39501953125, "learning_rate": 9.308685839709878e-07, "loss": 0.007005985360592604, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3637066036462784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1193, "train_speed(iter/s)": 0.029727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 72.94791793823242, "completions/min_length": 46.75, "epoch": 1.7788533134772897, "grad_norm": 1.2958010881356496, "kl": 0.41845703125, "learning_rate": 9.307485491325668e-07, "loss": 0.01879667490720749, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1194, "train_speed(iter/s)": 0.029722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 75.79166793823242, "completions/min_length": 49.5, "epoch": 1.780342516753537, "grad_norm": 1.4532636610267187, "kl": 0.3994140625, "learning_rate": 9.306284179277051e-07, "loss": -0.0010759744327515364, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2558748833835125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1195, "train_speed(iter/s)": 0.029718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 76.67708396911621, "completions/min_length": 43.0, "epoch": 1.7818317200297842, "grad_norm": 2.075495496080171, "kl": 0.41552734375, "learning_rate": 9.305081903832784e-07, "loss": 0.011064456775784492, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.11713541857898235, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.48048195987939835, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1196, "train_speed(iter/s)": 0.029715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.5, "completions/mean_length": 73.72916793823242, "completions/min_length": 46.75, "epoch": 1.7833209233060314, "grad_norm": 0.005532249682154284, "kl": 0.40234375, "learning_rate": 9.303878665261839e-07, "loss": 0.0004019789048470557, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1197, "train_speed(iter/s)": 0.029711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.25, "completions/mean_length": 70.0, "completions/min_length": 46.75, "epoch": 1.7848101265822784, "grad_norm": 0.005532237482802123, "kl": 0.4033203125, "learning_rate": 9.302674463833401e-07, "loss": 0.0004031313583254814, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1198, "train_speed(iter/s)": 0.029708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.25, "completions/mean_length": 70.51041984558105, "completions/min_length": 47.5, "epoch": 1.7862993298585257, "grad_norm": 0.9429280115155624, "kl": 0.43408203125, "learning_rate": 9.301469299816873e-07, "loss": -0.0030318065546453, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1199, "train_speed(iter/s)": 0.029706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 76.14583587646484, "completions/min_length": 45.25, "epoch": 1.787788533134773, "grad_norm": 1.2513219809410836, "kl": 0.4130859375, "learning_rate": 9.300263173481875e-07, "loss": -0.006626126356422901, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3735327273607254, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1200, "train_speed(iter/s)": 0.029708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.25, "completions/mean_length": 75.58333396911621, "completions/min_length": 46.0, "epoch": 1.78927773641102, "grad_norm": 2.1440447043209145, "kl": 0.41748046875, "learning_rate": 9.299056085098236e-07, "loss": -0.006990428548306227, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.4358111694455147, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1201, "train_speed(iter/s)": 0.029711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.25, "completions/mean_length": 75.85416793823242, "completions/min_length": 48.25, "epoch": 1.7907669396872672, "grad_norm": 0.7822444918610664, "kl": 0.41943359375, "learning_rate": 9.297848034936005e-07, "loss": 0.004687697626650333, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1202, "train_speed(iter/s)": 0.029713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 74.63541984558105, "completions/min_length": 47.75, "epoch": 1.7922561429635144, "grad_norm": 1.6568081955540634, "kl": 0.42138671875, "learning_rate": 9.296639023265446e-07, "loss": -0.007541111670434475, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.2893018424510956, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1203, "train_speed(iter/s)": 0.029703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 76.46875190734863, "completions/min_length": 47.25, "epoch": 1.7937453462397617, "grad_norm": 0.005444411417606627, "kl": 0.42138671875, "learning_rate": 9.29542905035704e-07, "loss": 0.00042141202720813453, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1204, "train_speed(iter/s)": 0.029699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.75, "completions/mean_length": 74.07291793823242, "completions/min_length": 43.75, "epoch": 1.795234549516009, "grad_norm": 0.8954520273200938, "kl": 0.45263671875, "learning_rate": 9.294218116481475e-07, "loss": -0.009841294959187508, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1205, "train_speed(iter/s)": 0.029689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 73.90625190734863, "completions/min_length": 42.0, "epoch": 1.7967237527922562, "grad_norm": 1.0767489427970645, "kl": 0.39501953125, "learning_rate": 9.293006221909663e-07, "loss": 0.0010590918827801943, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1206, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 76.73958587646484, "completions/min_length": 48.25, "epoch": 1.7982129560685034, "grad_norm": 0.005008183190894808, "kl": 0.4140625, "learning_rate": 9.291793366912726e-07, "loss": 0.0004137089417781681, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1207, "train_speed(iter/s)": 0.029679 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.75, "completions/mean_length": 76.78125381469727, "completions/min_length": 49.25, "epoch": 1.7997021593447506, "grad_norm": 1.5605547122158177, "kl": 0.41455078125, "learning_rate": 9.290579551762001e-07, "loss": 0.001553232199512422, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.34280356764793396, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1208, "train_speed(iter/s)": 0.02968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 79.72916984558105, "completions/min_length": 47.25, "epoch": 1.8011913626209979, "grad_norm": 1.7266281168906505, "kl": 0.3994140625, "learning_rate": 9.289364776729043e-07, "loss": -0.0032147790770977736, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1209, "train_speed(iter/s)": 0.029677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 78.69791984558105, "completions/min_length": 44.0, "epoch": 1.8026805658972451, "grad_norm": 1.3330501502191814, "kl": 0.39404296875, "learning_rate": 9.28814904208562e-07, "loss": 0.001173533033579588, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1210, "train_speed(iter/s)": 0.02968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.5, "completions/mean_length": 74.85416793823242, "completions/min_length": 47.5, "epoch": 1.8041697691734921, "grad_norm": 0.005423900214387384, "kl": 0.4150390625, "learning_rate": 9.286932348103715e-07, "loss": 0.0004144227714277804, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1211, "train_speed(iter/s)": 0.029682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 84.52083587646484, "completions/min_length": 49.5, "epoch": 1.8056589724497394, "grad_norm": 0.7246613158843181, "kl": 0.3515625, "learning_rate": 9.285714695055521e-07, "loss": -0.0015521942405030131, "memory(GiB)": 112.53, "reward": 1.4687500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1212, "train_speed(iter/s)": 0.02969 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 83.82291793823242, "completions/min_length": 51.25, "epoch": 1.8071481757259866, "grad_norm": 2.010187281357393, "kl": 0.369140625, "learning_rate": 9.284496083213454e-07, "loss": 0.01623915508389473, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.614583358168602, "rewards/CineAccuracyORM/std": 0.48275065422058105, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1213, "train_speed(iter/s)": 0.029698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.75, "completions/mean_length": 74.54166793823242, "completions/min_length": 39.25, "epoch": 1.8086373790022336, "grad_norm": 0.004935240307642081, "kl": 0.408203125, "learning_rate": 9.283276512850136e-07, "loss": 0.00040724617429077625, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1214, "train_speed(iter/s)": 0.029689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.25, "completions/mean_length": 75.54166984558105, "completions/min_length": 43.0, "epoch": 1.810126582278481, "grad_norm": 1.953164905425971, "kl": 0.4072265625, "learning_rate": 9.28205598423841e-07, "loss": 0.009131303988397121, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.12169522047042847, "rewards/CineAccuracyORM/mean": 0.833333358168602, "rewards/CineAccuracyORM/std": 0.2916583716869354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1215, "train_speed(iter/s)": 0.029674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 77.08333396911621, "completions/min_length": 44.75, "epoch": 1.8116157855547281, "grad_norm": 1.1678413503682723, "kl": 0.39013671875, "learning_rate": 9.280834497651332e-07, "loss": -0.00782875344157219, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1216, "train_speed(iter/s)": 0.02967 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.0, "completions/mean_length": 79.96875190734863, "completions/min_length": 42.75, "epoch": 1.8131049888309754, "grad_norm": 1.2057083508122053, "kl": 0.388671875, "learning_rate": 9.27961205336217e-07, "loss": 0.00534193217754364, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.35000117123126984, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1217, "train_speed(iter/s)": 0.029665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 75.68750190734863, "completions/min_length": 44.25, "epoch": 1.8145941921072226, "grad_norm": 1.1495700414893142, "kl": 0.384765625, "learning_rate": 9.278388651644405e-07, "loss": -0.008774980902671814, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.3477324768900871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1218, "train_speed(iter/s)": 0.029674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.25, "completions/mean_length": 76.31250190734863, "completions/min_length": 48.25, "epoch": 1.8160833953834699, "grad_norm": 1.6084432824008537, "kl": 0.40673828125, "learning_rate": 9.277164292771739e-07, "loss": 0.003106688614934683, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.39807476475834846, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1219, "train_speed(iter/s)": 0.029677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 78.89583587646484, "completions/min_length": 47.5, "epoch": 1.817572598659717, "grad_norm": 0.9995879885937128, "kl": 0.38037109375, "learning_rate": 9.275938977018081e-07, "loss": -0.0024335819762200117, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7812500223517418, "rewards/CineAccuracyORM/std": 0.26659026369452477, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1220, "train_speed(iter/s)": 0.029686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 78.06250190734863, "completions/min_length": 42.75, "epoch": 1.8190618019359643, "grad_norm": 1.1100167208794103, "kl": 0.37255859375, "learning_rate": 9.274712704657557e-07, "loss": 0.003108441364020109, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.3600961044430733, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1221, "train_speed(iter/s)": 0.029679 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.25, "completions/mean_length": 79.53125190734863, "completions/min_length": 46.25, "epoch": 1.8205510052122116, "grad_norm": 1.0602965748683533, "kl": 0.38330078125, "learning_rate": 9.273485475964509e-07, "loss": 0.0004783710464835167, "memory(GiB)": 112.53, "reward": 1.7083333432674408, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.23648399859666824, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1222, "train_speed(iter/s)": 0.029674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 76.85416793823242, "completions/min_length": 44.75, "epoch": 1.8220402084884588, "grad_norm": 0.9386579528933152, "kl": 0.39990234375, "learning_rate": 9.272257291213487e-07, "loss": 0.0017850397853180766, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1223, "train_speed(iter/s)": 0.029676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 79.95833587646484, "completions/min_length": 47.5, "epoch": 1.8235294117647058, "grad_norm": 1.555003488554093, "kl": 0.3720703125, "learning_rate": 9.271028150679264e-07, "loss": 0.026682814583182335, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.2982987128198147, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1224, "train_speed(iter/s)": 0.029674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 77.55208587646484, "completions/min_length": 43.75, "epoch": 1.825018615040953, "grad_norm": 2.6003923462284457, "kl": 0.365234375, "learning_rate": 9.269798054636815e-07, "loss": -0.01052296906709671, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.18831286393105984, "rewards/CineAccuracyORM/mean": 0.6979166939854622, "rewards/CineAccuracyORM/std": 0.4006930850446224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1225, "train_speed(iter/s)": 0.029681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 78.27083587646484, "completions/min_length": 47.25, "epoch": 1.8265078183172003, "grad_norm": 0.9765087690977337, "kl": 0.392578125, "learning_rate": 9.26856700336134e-07, "loss": 0.008566546253859997, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000298023224, "rewards/CineAccuracyORM/std": 0.40787915512919426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1226, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 77.70833396911621, "completions/min_length": 46.0, "epoch": 1.8279970215934473, "grad_norm": 0.005034347019762658, "kl": 0.3798828125, "learning_rate": 9.267334997128247e-07, "loss": 0.0003793510841205716, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1227, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.25, "completions/mean_length": 74.92708587646484, "completions/min_length": 45.0, "epoch": 1.8294862248696946, "grad_norm": 0.9970562208894911, "kl": 0.4189453125, "learning_rate": 9.266102036213159e-07, "loss": -0.008134977892041206, "memory(GiB)": 112.53, "reward": 1.6979167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1228, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 80.00000381469727, "completions/min_length": 48.0, "epoch": 1.8309754281459418, "grad_norm": 2.0525859531207726, "kl": 0.3994140625, "learning_rate": 9.264868120891911e-07, "loss": 0.012341506779193878, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3637066036462784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1229, "train_speed(iter/s)": 0.029684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.5, "completions/mean_length": 77.26041984558105, "completions/min_length": 49.75, "epoch": 1.832464631422189, "grad_norm": 0.005138237818637022, "kl": 0.3974609375, "learning_rate": 9.263633251440555e-07, "loss": 0.00039761161315254867, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1230, "train_speed(iter/s)": 0.029674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 103.25, "completions/mean_length": 75.07291793823242, "completions/min_length": 51.25, "epoch": 1.8339538346984363, "grad_norm": 1.5727130453613183, "kl": 0.37646484375, "learning_rate": 9.262397428135351e-07, "loss": -0.007220863364636898, "memory(GiB)": 112.53, "reward": 1.6458333432674408, "reward_std": 0.11713541857898235, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1231, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 74.47916793823242, "completions/min_length": 48.0, "epoch": 1.8354430379746836, "grad_norm": 1.1359082608196498, "kl": 0.39208984375, "learning_rate": 9.261160651252777e-07, "loss": 0.006420396268367767, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1232, "train_speed(iter/s)": 0.029685 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.5, "completions/mean_length": 74.79166984558105, "completions/min_length": 44.25, "epoch": 1.8369322412509308, "grad_norm": 0.004806339055377591, "kl": 0.40673828125, "learning_rate": 9.259922921069525e-07, "loss": 0.00040634721517562866, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1233, "train_speed(iter/s)": 0.02968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.75, "completions/mean_length": 77.01041984558105, "completions/min_length": 45.75, "epoch": 1.838421444527178, "grad_norm": 4.257480827009973, "kl": 0.40380859375, "learning_rate": 9.258684237862496e-07, "loss": -0.0007519145729020238, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.2433207482099533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1234, "train_speed(iter/s)": 0.029678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 77.65625381469727, "completions/min_length": 41.5, "epoch": 1.8399106478034253, "grad_norm": 0.006233077336629374, "kl": 0.373046875, "learning_rate": 9.257444601908805e-07, "loss": 0.0003735218197107315, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1235, "train_speed(iter/s)": 0.029686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 76.19791793823242, "completions/min_length": 47.0, "epoch": 1.8413998510796725, "grad_norm": 0.9788377762204791, "kl": 0.38818359375, "learning_rate": 9.256204013485785e-07, "loss": 0.006096796598285437, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500102445483, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1236, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 75.18750190734863, "completions/min_length": 47.0, "epoch": 1.8428890543559195, "grad_norm": 1.6827442033190423, "kl": 0.40771484375, "learning_rate": 9.254962472870975e-07, "loss": 0.00934627652168274, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.12169522047042847, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1237, "train_speed(iter/s)": 0.029697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 76.14583587646484, "completions/min_length": 51.0, "epoch": 1.8443782576321668, "grad_norm": 1.1458530879559625, "kl": 0.37841796875, "learning_rate": 9.253719980342134e-07, "loss": -0.0006358014652505517, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1238, "train_speed(iter/s)": 0.029706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.75, "completions/mean_length": 73.73958778381348, "completions/min_length": 42.25, "epoch": 1.845867460908414, "grad_norm": 1.986496706522463, "kl": 0.40771484375, "learning_rate": 9.252476536177229e-07, "loss": -0.006033960729837418, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3641507476568222, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1239, "train_speed(iter/s)": 0.029704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 82.45833587646484, "completions/min_length": 43.0, "epoch": 1.847356664184661, "grad_norm": 0.005417595908160805, "kl": 0.38818359375, "learning_rate": 9.251232140654442e-07, "loss": 0.0003881038574036211, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1240, "train_speed(iter/s)": 0.029686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 76.79166984558105, "completions/min_length": 44.0, "epoch": 1.8488458674609083, "grad_norm": 1.1833184428566261, "kl": 0.373046875, "learning_rate": 9.249986794052167e-07, "loss": -0.006554383784532547, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1241, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.75, "completions/mean_length": 77.84375190734863, "completions/min_length": 39.5, "epoch": 1.8503350707371555, "grad_norm": 2.00122090540703, "kl": 0.39111328125, "learning_rate": 9.248740496649011e-07, "loss": 0.01537394430488348, "memory(GiB)": 112.53, "reward": 1.4270834028720856, "reward_std": 0.17920634150505066, "rewards/CineAccuracyORM/mean": 0.4270833507180214, "rewards/CineAccuracyORM/std": 0.4759037047624588, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1242, "train_speed(iter/s)": 0.02969 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 76.83333587646484, "completions/min_length": 53.75, "epoch": 1.8518242740134028, "grad_norm": 0.6113409896466244, "kl": 0.39208984375, "learning_rate": 9.247493248723795e-07, "loss": -0.00931677594780922, "memory(GiB)": 112.53, "reward": 1.479166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.479166679084301, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1243, "train_speed(iter/s)": 0.029684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 78.32291793823242, "completions/min_length": 46.5, "epoch": 1.85331347728965, "grad_norm": 0.0055360262078800624, "kl": 0.40576171875, "learning_rate": 9.24624505055555e-07, "loss": 0.00040512619307264686, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1244, "train_speed(iter/s)": 0.029686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 74.17708587646484, "completions/min_length": 48.75, "epoch": 1.8548026805658973, "grad_norm": 0.8891766022423843, "kl": 0.392578125, "learning_rate": 9.244995902423522e-07, "loss": -0.0015258787898346782, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1245, "train_speed(iter/s)": 0.029681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 75.67708587646484, "completions/min_length": 45.75, "epoch": 1.8562918838421445, "grad_norm": 0.005057192832025351, "kl": 0.4013671875, "learning_rate": 9.243745804607168e-07, "loss": 0.0004011665005236864, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1246, "train_speed(iter/s)": 0.029682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.5, "completions/mean_length": 73.79166984558105, "completions/min_length": 42.0, "epoch": 1.8577810871183917, "grad_norm": 1.4781782719826873, "kl": 0.41064453125, "learning_rate": 9.242494757386159e-07, "loss": 0.008966393768787384, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1247, "train_speed(iter/s)": 0.029685 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 75.55208587646484, "completions/min_length": 43.25, "epoch": 1.859270290394639, "grad_norm": 0.7565778381784833, "kl": 0.39697265625, "learning_rate": 9.241242761040375e-07, "loss": 0.0013384212506935, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1248, "train_speed(iter/s)": 0.029687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 72.20833587646484, "completions/min_length": 45.75, "epoch": 1.8607594936708862, "grad_norm": 1.0081778350889232, "kl": 0.41943359375, "learning_rate": 9.239989815849915e-07, "loss": -0.009303398430347443, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.4154365845024586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1249, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 77.09375190734863, "completions/min_length": 47.75, "epoch": 1.8622486969471332, "grad_norm": 0.0061549554945705244, "kl": 0.40234375, "learning_rate": 9.238735922095082e-07, "loss": 0.0004026126116514206, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1250, "train_speed(iter/s)": 0.029691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 77.38541793823242, "completions/min_length": 46.25, "epoch": 1.8637379002233805, "grad_norm": 0.00529310682645214, "kl": 0.39697265625, "learning_rate": 9.237481080056398e-07, "loss": 0.0003963689669035375, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1251, "train_speed(iter/s)": 0.0297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 76.06250190734863, "completions/min_length": 45.0, "epoch": 1.8652271034996277, "grad_norm": 0.7522338072035979, "kl": 0.41357421875, "learning_rate": 9.236225290014595e-07, "loss": -0.007823885418474674, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1252, "train_speed(iter/s)": 0.029702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 75.82291793823242, "completions/min_length": 45.25, "epoch": 1.8667163067758747, "grad_norm": 0.8746044851433504, "kl": 0.41064453125, "learning_rate": 9.234968552250612e-07, "loss": 0.016661077737808228, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1253, "train_speed(iter/s)": 0.029696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 73.05208492279053, "completions/min_length": 44.75, "epoch": 1.868205510052122, "grad_norm": 0.005788920854836892, "kl": 0.41650390625, "learning_rate": 9.233710867045609e-07, "loss": 0.00041676219552755356, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1254, "train_speed(iter/s)": 0.029702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 82.08333587646484, "completions/min_length": 50.25, "epoch": 1.8696947133283692, "grad_norm": 1.8026914197583357, "kl": 0.4091796875, "learning_rate": 9.232452234680952e-07, "loss": -0.012596232816576958, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.12028077617287636, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3209419921040535, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1255, "train_speed(iter/s)": 0.029691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.75, "completions/mean_length": 72.97916984558105, "completions/min_length": 43.75, "epoch": 1.8711839166046165, "grad_norm": 1.5528341742209837, "kl": 0.4228515625, "learning_rate": 9.23119265543822e-07, "loss": 0.010798620991408825, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.33919306844472885, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1256, "train_speed(iter/s)": 0.029694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.0, "completions/mean_length": 75.78125190734863, "completions/min_length": 45.5, "epoch": 1.8726731198808637, "grad_norm": 0.8613261769475798, "kl": 0.419921875, "learning_rate": 9.229932129599205e-07, "loss": 0.0073505281470716, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1257, "train_speed(iter/s)": 0.029696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.75, "completions/mean_length": 74.76041793823242, "completions/min_length": 46.5, "epoch": 1.874162323157111, "grad_norm": 1.157810783384512, "kl": 0.40966796875, "learning_rate": 9.228670657445909e-07, "loss": 0.009393801912665367, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1258, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.75, "completions/mean_length": 77.03125190734863, "completions/min_length": 48.5, "epoch": 1.8756515264333582, "grad_norm": 0.0053794965846959125, "kl": 0.41552734375, "learning_rate": 9.227408239260549e-07, "loss": 0.0004158782248850912, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1259, "train_speed(iter/s)": 0.029687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 79.40625190734863, "completions/min_length": 47.75, "epoch": 1.8771407297096054, "grad_norm": 1.421860306851593, "kl": 0.40283203125, "learning_rate": 9.226144875325549e-07, "loss": -0.0030532963573932648, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.4190470837056637, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 1260, "train_speed(iter/s)": 0.02969 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 78.34375381469727, "completions/min_length": 47.5, "epoch": 1.8786299329858527, "grad_norm": 0.8333447450950348, "kl": 0.39599609375, "learning_rate": 9.224880565923547e-07, "loss": -0.0007962223025970161, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1261, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 79.64583587646484, "completions/min_length": 49.0, "epoch": 1.8801191362621, "grad_norm": 1.3219214814992128, "kl": 0.400390625, "learning_rate": 9.223615311337395e-07, "loss": -0.002422524616122246, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5833333386108279, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1262, "train_speed(iter/s)": 0.029694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.25, "completions/mean_length": 75.31250190734863, "completions/min_length": 47.5, "epoch": 1.881608339538347, "grad_norm": 2.0016961795549553, "kl": 0.4111328125, "learning_rate": 9.22234911185015e-07, "loss": -0.0022075052838772535, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.1322161816060543, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3590897470712662, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1263, "train_speed(iter/s)": 0.029696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.5, "completions/mean_length": 74.68750190734863, "completions/min_length": 44.75, "epoch": 1.8830975428145942, "grad_norm": 0.7187192279941134, "kl": 0.3779296875, "learning_rate": 9.221081967745088e-07, "loss": 0.007665669079869986, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1264, "train_speed(iter/s)": 0.029699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.5, "completions/mean_length": 73.19791984558105, "completions/min_length": 44.25, "epoch": 1.8845867460908414, "grad_norm": 0.7675226385496178, "kl": 0.416015625, "learning_rate": 9.219813879305691e-07, "loss": -0.0017812536098062992, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1265, "train_speed(iter/s)": 0.029689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 77.68750190734863, "completions/min_length": 50.0, "epoch": 1.8860759493670884, "grad_norm": 0.7427836814510924, "kl": 0.412109375, "learning_rate": 9.218544846815655e-07, "loss": 0.010664355009794235, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500102445483, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1266, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.25, "completions/mean_length": 78.36458587646484, "completions/min_length": 43.25, "epoch": 1.8875651526433357, "grad_norm": 0.00563017059123379, "kl": 0.4072265625, "learning_rate": 9.217274870558882e-07, "loss": 0.00040661715320311487, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1267, "train_speed(iter/s)": 0.029697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 73.69791793823242, "completions/min_length": 46.75, "epoch": 1.889054355919583, "grad_norm": 0.005384627472638651, "kl": 0.40234375, "learning_rate": 9.216003950819495e-07, "loss": 0.00040187439299188554, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1268, "train_speed(iter/s)": 0.029699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.25, "completions/mean_length": 75.54166984558105, "completions/min_length": 46.5, "epoch": 1.8905435591958302, "grad_norm": 1.01385727633635, "kl": 0.400390625, "learning_rate": 9.214732087881818e-07, "loss": 0.000962178863119334, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1269, "train_speed(iter/s)": 0.029701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.75, "completions/mean_length": 73.25000381469727, "completions/min_length": 47.75, "epoch": 1.8920327624720774, "grad_norm": 0.00605820198929566, "kl": 0.42333984375, "learning_rate": 9.213459282030393e-07, "loss": 0.00042371911695227027, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1270, "train_speed(iter/s)": 0.029704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 76.13541793823242, "completions/min_length": 49.5, "epoch": 1.8935219657483247, "grad_norm": 0.005498955519466031, "kl": 0.43017578125, "learning_rate": 9.212185533549969e-07, "loss": 0.0004296647384762764, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1271, "train_speed(iter/s)": 0.029701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.5, "completions/mean_length": 69.93750381469727, "completions/min_length": 39.75, "epoch": 1.895011169024572, "grad_norm": 2.200715447414677, "kl": 0.42822265625, "learning_rate": 9.210910842725508e-07, "loss": -0.02072247490286827, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.1322161816060543, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1272, "train_speed(iter/s)": 0.029703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.25, "completions/mean_length": 77.59375190734863, "completions/min_length": 50.5, "epoch": 1.8965003723008191, "grad_norm": 1.242534915659079, "kl": 0.42724609375, "learning_rate": 9.209635209842182e-07, "loss": 0.01705051027238369, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1273, "train_speed(iter/s)": 0.029691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 105.25, "completions/mean_length": 74.27083778381348, "completions/min_length": 51.25, "epoch": 1.8979895755770664, "grad_norm": 1.4453396145135846, "kl": 0.419921875, "learning_rate": 9.208358635185372e-07, "loss": -0.0035408507101237774, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.08908708393573761, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1274, "train_speed(iter/s)": 0.029693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.5, "completions/mean_length": 75.08333587646484, "completions/min_length": 48.5, "epoch": 1.8994787788533136, "grad_norm": 2.552341645220276, "kl": 0.42626953125, "learning_rate": 9.207081119040672e-07, "loss": -0.003043845295906067, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.1550404578447342, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1275, "train_speed(iter/s)": 0.029696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 71.11458396911621, "completions/min_length": 47.75, "epoch": 1.9009679821295606, "grad_norm": 0.005753637322914239, "kl": 0.44482421875, "learning_rate": 9.205802661693888e-07, "loss": 0.0004444948863238096, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1276, "train_speed(iter/s)": 0.029705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 102.5, "completions/mean_length": 72.26041793823242, "completions/min_length": 48.0, "epoch": 1.9024571854058079, "grad_norm": 1.626366880185669, "kl": 0.42724609375, "learning_rate": 9.204523263431032e-07, "loss": 0.006677163299173117, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1277, "train_speed(iter/s)": 0.029699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 109.75, "completions/mean_length": 70.52083587646484, "completions/min_length": 43.0, "epoch": 1.9039463886820551, "grad_norm": 0.005545228703874426, "kl": 0.4345703125, "learning_rate": 9.203242924538332e-07, "loss": 0.0004343357286415994, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1278, "train_speed(iter/s)": 0.0297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.0, "completions/mean_length": 78.61458587646484, "completions/min_length": 48.75, "epoch": 1.9054355919583021, "grad_norm": 0.8703785907442229, "kl": 0.4140625, "learning_rate": 9.20196164530222e-07, "loss": 0.006196999456733465, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1279, "train_speed(iter/s)": 0.029703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 79.70833587646484, "completions/min_length": 48.25, "epoch": 1.9069247952345494, "grad_norm": 0.005658913753444138, "kl": 0.42138671875, "learning_rate": 9.200679426009346e-07, "loss": 0.00042173307156190276, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1280, "train_speed(iter/s)": 0.029699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.25, "completions/mean_length": 74.55208587646484, "completions/min_length": 48.25, "epoch": 1.9084139985107966, "grad_norm": 0.005673551645269277, "kl": 0.4169921875, "learning_rate": 9.199396266946562e-07, "loss": 0.0004169153980910778, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1281, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 108.5, "completions/mean_length": 73.63541793823242, "completions/min_length": 47.75, "epoch": 1.9099032017870439, "grad_norm": 1.6508721111183204, "kl": 0.40771484375, "learning_rate": 9.198112168400937e-07, "loss": -0.007884635590016842, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1282, "train_speed(iter/s)": 0.029699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 106.5, "completions/mean_length": 69.79166793823242, "completions/min_length": 42.75, "epoch": 1.9113924050632911, "grad_norm": 0.006069580893758423, "kl": 0.44580078125, "learning_rate": 9.19682713065975e-07, "loss": 0.00044580185203813016, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1283, "train_speed(iter/s)": 0.029703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 79.86458587646484, "completions/min_length": 50.0, "epoch": 1.9128816083395384, "grad_norm": 0.005423472582141253, "kl": 0.39208984375, "learning_rate": 9.195541154010482e-07, "loss": 0.00039162224857136607, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1284, "train_speed(iter/s)": 0.029704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 113.0, "completions/mean_length": 76.56250381469727, "completions/min_length": 46.75, "epoch": 1.9143708116157856, "grad_norm": 0.0054325516268315275, "kl": 0.427734375, "learning_rate": 9.194254238740835e-07, "loss": 0.0004279778222553432, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1285, "train_speed(iter/s)": 0.029708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.0, "completions/mean_length": 74.00000190734863, "completions/min_length": 48.25, "epoch": 1.9158600148920328, "grad_norm": 1.3591955038493744, "kl": 0.4482421875, "learning_rate": 9.192966385138712e-07, "loss": -0.0011958240065723658, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.2819983549416065, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1286, "train_speed(iter/s)": 0.029697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.25, "completions/mean_length": 75.13541793823242, "completions/min_length": 43.25, "epoch": 1.91734921816828, "grad_norm": 0.9888134704381822, "kl": 0.40234375, "learning_rate": 9.191677593492233e-07, "loss": -0.004575823899358511, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1287, "train_speed(iter/s)": 0.029699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 107.0, "completions/mean_length": 71.95833396911621, "completions/min_length": 45.5, "epoch": 1.9188384214445273, "grad_norm": 0.0054273769278898225, "kl": 0.44921875, "learning_rate": 9.190387864089722e-07, "loss": 0.0004486961697693914, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1288, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 74.44791984558105, "completions/min_length": 48.75, "epoch": 1.9203276247207743, "grad_norm": 0.0055464446022140115, "kl": 0.3955078125, "learning_rate": 9.189097197219716e-07, "loss": 0.00039542405284009874, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1289, "train_speed(iter/s)": 0.02969 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.0, "completions/mean_length": 76.01041984558105, "completions/min_length": 49.75, "epoch": 1.9218168279970216, "grad_norm": 1.6499256699461475, "kl": 0.404296875, "learning_rate": 9.187805593170963e-07, "loss": -0.0029002726078033447, "memory(GiB)": 112.53, "reward": 1.4375000596046448, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.43750001303851604, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1290, "train_speed(iter/s)": 0.029693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.25, "completions/mean_length": 71.64583587646484, "completions/min_length": 49.5, "epoch": 1.9233060312732688, "grad_norm": 1.6179518737539513, "kl": 0.44970703125, "learning_rate": 9.186513052232416e-07, "loss": 0.007417020853608847, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1291, "train_speed(iter/s)": 0.029689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.25, "completions/mean_length": 73.09375190734863, "completions/min_length": 44.75, "epoch": 1.9247952345495158, "grad_norm": 0.006525947024338625, "kl": 0.4375, "learning_rate": 9.185219574693241e-07, "loss": 0.0004362921172287315, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1292, "train_speed(iter/s)": 0.029684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 77.50000190734863, "completions/min_length": 46.5, "epoch": 1.926284437825763, "grad_norm": 0.7413332789265197, "kl": 0.41796875, "learning_rate": 9.183925160842816e-07, "loss": -0.0002666788350325078, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1293, "train_speed(iter/s)": 0.02968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 118.75, "completions/mean_length": 71.72916984558105, "completions/min_length": 47.75, "epoch": 1.9277736411020103, "grad_norm": 2.6370308084562746, "kl": 0.43603515625, "learning_rate": 9.182629810970721e-07, "loss": -0.0007615220965817571, "memory(GiB)": 112.53, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1294, "train_speed(iter/s)": 0.029664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.5, "completions/mean_length": 74.97916793823242, "completions/min_length": 47.5, "epoch": 1.9292628443782576, "grad_norm": 0.9474135791368529, "kl": 0.4140625, "learning_rate": 9.181333525366754e-07, "loss": 0.006667240522801876, "memory(GiB)": 112.53, "reward": 1.4375000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.4375000149011612, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1295, "train_speed(iter/s)": 0.029662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 75.87500381469727, "completions/min_length": 50.75, "epoch": 1.9307520476545048, "grad_norm": 1.6455165889795331, "kl": 0.427734375, "learning_rate": 9.180036304320915e-07, "loss": -0.012690544128417969, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3470437303185463, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1296, "train_speed(iter/s)": 0.02966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.0, "completions/mean_length": 78.31250381469727, "completions/min_length": 46.5, "epoch": 1.932241250930752, "grad_norm": 0.0058008032642031, "kl": 0.41455078125, "learning_rate": 9.178738148123417e-07, "loss": 0.00041431706631556153, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1297, "train_speed(iter/s)": 0.02966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 74.84375190734863, "completions/min_length": 44.75, "epoch": 1.9337304542069993, "grad_norm": 1.433198317821365, "kl": 0.42919921875, "learning_rate": 9.177439057064682e-07, "loss": 0.0022442396730184555, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.43905915319919586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1298, "train_speed(iter/s)": 0.029655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.0, "completions/mean_length": 70.97916984558105, "completions/min_length": 43.0, "epoch": 1.9352196574832465, "grad_norm": 1.0515369058179829, "kl": 0.44091796875, "learning_rate": 9.17613903143534e-07, "loss": -0.0017052225302904844, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1299, "train_speed(iter/s)": 0.029657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 77.29166984558105, "completions/min_length": 48.5, "epoch": 1.9367088607594938, "grad_norm": 1.5818288888292922, "kl": 0.4150390625, "learning_rate": 9.174838071526233e-07, "loss": -0.009570988826453686, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.6041666818782687, "rewards/CineAccuracyORM/std": 0.3330293893814087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1300, "train_speed(iter/s)": 0.029666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 77.95833587646484, "completions/min_length": 51.25, "epoch": 1.938198064035741, "grad_norm": 0.00527884463016777, "kl": 0.4228515625, "learning_rate": 9.173536177628408e-07, "loss": 0.00042316626058891416, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1301, "train_speed(iter/s)": 0.029674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 76.28125286102295, "completions/min_length": 44.0, "epoch": 1.939687267311988, "grad_norm": 1.880086734172538, "kl": 0.42626953125, "learning_rate": 9.172233350033122e-07, "loss": -0.007867304608225822, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4845366030931473, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1302, "train_speed(iter/s)": 0.029676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 79.67708396911621, "completions/min_length": 46.0, "epoch": 1.9411764705882353, "grad_norm": 1.4466431686600423, "kl": 0.3916015625, "learning_rate": 9.170929589031846e-07, "loss": -0.012291970662772655, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1303, "train_speed(iter/s)": 0.029676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 77.30208587646484, "completions/min_length": 47.25, "epoch": 1.9426656738644825, "grad_norm": 0.005098135764322635, "kl": 0.40380859375, "learning_rate": 9.169624894916251e-07, "loss": 0.0004043503140565008, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1304, "train_speed(iter/s)": 0.029684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.0, "completions/mean_length": 74.88541793823242, "completions/min_length": 50.5, "epoch": 1.9441548771407298, "grad_norm": 0.00556813396504674, "kl": 0.396484375, "learning_rate": 9.168319267978222e-07, "loss": 0.0003963753115385771, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1305, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 75.80208587646484, "completions/min_length": 47.25, "epoch": 1.9456440804169768, "grad_norm": 0.005758385981789596, "kl": 0.4169921875, "learning_rate": 9.167012708509854e-07, "loss": 0.00041711656376719475, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1306, "train_speed(iter/s)": 0.029696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 76.32291984558105, "completions/min_length": 48.0, "epoch": 1.947133283693224, "grad_norm": 0.6429579884107367, "kl": 0.39794921875, "learning_rate": 9.165705216803445e-07, "loss": 0.010215602815151215, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1307, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.0, "completions/mean_length": 73.47916984558105, "completions/min_length": 41.5, "epoch": 1.9486224869694713, "grad_norm": 1.7929605516441531, "kl": 0.4013671875, "learning_rate": 9.16439679315151e-07, "loss": -0.009814523160457611, "memory(GiB)": 112.53, "reward": 1.8125, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.2080918326973915, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1308, "train_speed(iter/s)": 0.029701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.0, "completions/mean_length": 74.70833778381348, "completions/min_length": 49.0, "epoch": 1.9501116902457185, "grad_norm": 0.014403242141552267, "kl": 0.4130859375, "learning_rate": 9.163087437846764e-07, "loss": 0.00041303629404865205, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1309, "train_speed(iter/s)": 0.029709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.75, "completions/mean_length": 77.26041793823242, "completions/min_length": 45.5, "epoch": 1.9516008935219658, "grad_norm": 0.0056973470511037655, "kl": 0.42626953125, "learning_rate": 9.161777151182135e-07, "loss": 0.00042605967610143125, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1310, "train_speed(iter/s)": 0.0297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 77.37500190734863, "completions/min_length": 46.0, "epoch": 1.953090096798213, "grad_norm": 1.0559834327005477, "kl": 0.4111328125, "learning_rate": 9.160465933450761e-07, "loss": 0.003003329038619995, "memory(GiB)": 112.53, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1311, "train_speed(iter/s)": 0.029708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.75, "completions/mean_length": 80.40625381469727, "completions/min_length": 51.0, "epoch": 1.9545793000744602, "grad_norm": 0.00672594046101192, "kl": 0.37548828125, "learning_rate": 9.159153784945981e-07, "loss": 0.0003760959953069687, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1312, "train_speed(iter/s)": 0.02971 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 76.40625190734863, "completions/min_length": 48.75, "epoch": 1.9560685033507075, "grad_norm": 2.360475324709955, "kl": 0.43603515625, "learning_rate": 9.157840705961348e-07, "loss": 0.006695447489619255, "memory(GiB)": 112.53, "reward": 1.354166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.35416667722165585, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1313, "train_speed(iter/s)": 0.029707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 79.96875381469727, "completions/min_length": 50.5, "epoch": 1.9575577066269547, "grad_norm": 0.006932930512116898, "kl": 0.416015625, "learning_rate": 9.156526696790626e-07, "loss": 0.00041638570837676525, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1314, "train_speed(iter/s)": 0.029702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 77.40625381469727, "completions/min_length": 49.5, "epoch": 1.9590469099032017, "grad_norm": 0.005873045785612528, "kl": 0.38037109375, "learning_rate": 9.155211757727781e-07, "loss": 0.00038016968755982816, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1315, "train_speed(iter/s)": 0.02971 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 75.01041984558105, "completions/min_length": 45.5, "epoch": 1.960536113179449, "grad_norm": 0.00596125489127769, "kl": 0.42578125, "learning_rate": 9.153895889066987e-07, "loss": 0.00042618514271453023, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1316, "train_speed(iter/s)": 0.02971 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 79.07291793823242, "completions/min_length": 48.5, "epoch": 1.9620253164556962, "grad_norm": 0.006610946364678635, "kl": 0.396484375, "learning_rate": 9.15257909110263e-07, "loss": 0.000396043062210083, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1317, "train_speed(iter/s)": 0.029712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 80.18750190734863, "completions/min_length": 46.25, "epoch": 1.9635145197319435, "grad_norm": 1.1643143464064978, "kl": 0.39404296875, "learning_rate": 9.151261364129303e-07, "loss": 0.00026237923884764314, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333414047956, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1318, "train_speed(iter/s)": 0.029708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 79.33333778381348, "completions/min_length": 54.0, "epoch": 1.9650037230081905, "grad_norm": 1.5867238158848098, "kl": 0.380859375, "learning_rate": 9.149942708441807e-07, "loss": 0.0023000240325927734, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.10661446116864681, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.21880721300840378, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1319, "train_speed(iter/s)": 0.029716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 83.33333587646484, "completions/min_length": 55.25, "epoch": 1.9664929262844377, "grad_norm": 0.0063458452726639965, "kl": 0.3876953125, "learning_rate": 9.148623124335146e-07, "loss": 0.00038809532998129725, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1320, "train_speed(iter/s)": 0.029711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 76.61458587646484, "completions/min_length": 42.75, "epoch": 1.967982129560685, "grad_norm": 0.8691638385708594, "kl": 0.396484375, "learning_rate": 9.147302612104538e-07, "loss": 0.003063372103497386, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1321, "train_speed(iter/s)": 0.029712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 79.64583587646484, "completions/min_length": 48.75, "epoch": 1.9694713328369322, "grad_norm": 0.00507593440132707, "kl": 0.40380859375, "learning_rate": 9.145981172045405e-07, "loss": 0.0004033759469166398, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1322, "train_speed(iter/s)": 0.029705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 75.25000190734863, "completions/min_length": 48.75, "epoch": 1.9709605361131795, "grad_norm": 0.006903771035608979, "kl": 0.41259765625, "learning_rate": 9.144658804453379e-07, "loss": 0.0004132188332732767, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1323, "train_speed(iter/s)": 0.029706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 122.5, "completions/mean_length": 77.38541793823242, "completions/min_length": 46.5, "epoch": 1.9724497393894267, "grad_norm": 0.005857510224345884, "kl": 0.42236328125, "learning_rate": 9.143335509624297e-07, "loss": 0.0004227631725370884, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1324, "train_speed(iter/s)": 0.029703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.25, "completions/mean_length": 75.98958587646484, "completions/min_length": 42.0, "epoch": 1.973938942665674, "grad_norm": 2.118229047782912, "kl": 0.4130859375, "learning_rate": 9.142011287854205e-07, "loss": 0.003228573827072978, "memory(GiB)": 112.53, "reward": 1.447916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1325, "train_speed(iter/s)": 0.0297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 81.58333587646484, "completions/min_length": 46.0, "epoch": 1.9754281459419212, "grad_norm": 1.6817881875772427, "kl": 0.40234375, "learning_rate": 9.140686139439356e-07, "loss": 0.008192263543605804, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.0876726359128952, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1326, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 81.39583778381348, "completions/min_length": 47.5, "epoch": 1.9769173492181684, "grad_norm": 1.589859625817437, "kl": 0.38818359375, "learning_rate": 9.139360064676211e-07, "loss": -0.000450423511210829, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1327, "train_speed(iter/s)": 0.029697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.25, "completions/mean_length": 77.73958587646484, "completions/min_length": 47.0, "epoch": 1.9784065524944154, "grad_norm": 0.005676128476790925, "kl": 0.41748046875, "learning_rate": 9.138033063861434e-07, "loss": 0.0004170656611677259, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1328, "train_speed(iter/s)": 0.0297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.5, "completions/mean_length": 75.73958587646484, "completions/min_length": 45.75, "epoch": 1.9798957557706627, "grad_norm": 0.00539557209867939, "kl": 0.40771484375, "learning_rate": 9.136705137291905e-07, "loss": 0.0004078896890860051, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1329, "train_speed(iter/s)": 0.029696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.75, "completions/mean_length": 75.19791793823242, "completions/min_length": 39.5, "epoch": 1.98138495904691, "grad_norm": 1.5929218522794066, "kl": 0.42529296875, "learning_rate": 9.135376285264702e-07, "loss": 0.0063369981944561005, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.2783776558935642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1330, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 112.25, "completions/mean_length": 76.22916984558105, "completions/min_length": 49.0, "epoch": 1.9828741623231572, "grad_norm": 2.3720209231511813, "kl": 0.41455078125, "learning_rate": 9.134046508077116e-07, "loss": 0.004730654414743185, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.12169522047042847, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.3241734802722931, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1331, "train_speed(iter/s)": 0.0297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.0, "completions/mean_length": 82.91666793823242, "completions/min_length": 51.5, "epoch": 1.9843633655994042, "grad_norm": 0.005548660986464129, "kl": 0.373046875, "learning_rate": 9.132715806026641e-07, "loss": 0.00037323636934161186, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1332, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 78.71875190734863, "completions/min_length": 48.0, "epoch": 1.9858525688756514, "grad_norm": 0.005518374919050054, "kl": 0.412109375, "learning_rate": 9.13138417941098e-07, "loss": 0.0004124256956856698, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1333, "train_speed(iter/s)": 0.029689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.25, "completions/mean_length": 80.36458587646484, "completions/min_length": 51.0, "epoch": 1.9873417721518987, "grad_norm": 1.0803651588204986, "kl": 0.40087890625, "learning_rate": 9.130051628528045e-07, "loss": -0.0009294860064983368, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1334, "train_speed(iter/s)": 0.029686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 119.5, "completions/mean_length": 80.06250381469727, "completions/min_length": 43.5, "epoch": 1.988830975428146, "grad_norm": 0.006049173791245826, "kl": 0.392578125, "learning_rate": 9.12871815367595e-07, "loss": 0.00039331111474893987, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1335, "train_speed(iter/s)": 0.029682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 81.97916793823242, "completions/min_length": 49.5, "epoch": 1.9903201787043932, "grad_norm": 1.9381463196123743, "kl": 0.3984375, "learning_rate": 9.127383755153017e-07, "loss": -0.0019281212007626891, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1336, "train_speed(iter/s)": 0.029678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 81.76041984558105, "completions/min_length": 48.75, "epoch": 1.9918093819806404, "grad_norm": 2.8758721341791866, "kl": 0.40771484375, "learning_rate": 9.126048433257778e-07, "loss": 0.00956110842525959, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.833333358168602, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1337, "train_speed(iter/s)": 0.029673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 115.0, "completions/mean_length": 77.13541984558105, "completions/min_length": 50.0, "epoch": 1.9932985852568876, "grad_norm": 0.005853146942322212, "kl": 0.39892578125, "learning_rate": 9.12471218828897e-07, "loss": 0.00039944727905094624, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1338, "train_speed(iter/s)": 0.029676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.5, "completions/mean_length": 78.79166984558105, "completions/min_length": 47.25, "epoch": 1.9947877885331349, "grad_norm": 0.8266742031316966, "kl": 0.40966796875, "learning_rate": 9.123375020545534e-07, "loss": 0.001888867700472474, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1339, "train_speed(iter/s)": 0.029684 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 83.34375190734863, "completions/min_length": 47.5, "epoch": 1.9962769918093821, "grad_norm": 0.012228071158610094, "kl": 0.39404296875, "learning_rate": 9.122036930326618e-07, "loss": 0.0003941802424378693, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1340, "train_speed(iter/s)": 0.029685 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.5, "completions/mean_length": 75.85416793823242, "completions/min_length": 50.0, "epoch": 1.9977661950856291, "grad_norm": 0.005111840491589213, "kl": 0.4111328125, "learning_rate": 9.120697917931581e-07, "loss": 0.00041116515058092773, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1341, "train_speed(iter/s)": 0.029681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 81.59375190734863, "completions/min_length": 47.25, "epoch": 1.9992553983618764, "grad_norm": 5.117842115680345, "kl": 0.40478515625, "learning_rate": 9.119357983659981e-07, "loss": 0.005829465575516224, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.3307463824748993, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1342, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 81.20833587646484, "completions/min_length": 41.25, "epoch": 2.0014892032762472, "grad_norm": 1.1633955456907121, "kl": 0.38330078125, "learning_rate": 9.11801712781159e-07, "loss": 0.011224708519876003, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1343, "train_speed(iter/s)": 0.029681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 80.77083587646484, "completions/min_length": 53.0, "epoch": 2.0029784065524945, "grad_norm": 1.4297943075827526, "kl": 0.390625, "learning_rate": 9.11667535068638e-07, "loss": 0.0063718874007463455, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.33919306844472885, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1344, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 81.52083587646484, "completions/min_length": 55.0, "epoch": 2.0044676098287417, "grad_norm": 6.592455135483212, "kl": 0.3935546875, "learning_rate": 9.115332652584533e-07, "loss": 0.00416956190019846, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1345, "train_speed(iter/s)": 0.029689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.0, "completions/mean_length": 79.29166984558105, "completions/min_length": 49.75, "epoch": 2.005956813104989, "grad_norm": 1.1606184848080892, "kl": 0.39208984375, "learning_rate": 9.113989033806433e-07, "loss": -0.005209701135754585, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1346, "train_speed(iter/s)": 0.029687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 85.69791793823242, "completions/min_length": 54.25, "epoch": 2.007446016381236, "grad_norm": 0.005170310994869013, "kl": 0.37158203125, "learning_rate": 9.112644494652674e-07, "loss": 0.0003708908334374428, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1347, "train_speed(iter/s)": 0.029681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 79.33333587646484, "completions/min_length": 47.0, "epoch": 2.0089352196574835, "grad_norm": 1.3391936772869009, "kl": 0.388671875, "learning_rate": 9.111299035424056e-07, "loss": 0.014300576411187649, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1348, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 78.95833587646484, "completions/min_length": 51.5, "epoch": 2.0104244229337302, "grad_norm": 0.005158161876389632, "kl": 0.3974609375, "learning_rate": 9.10995265642158e-07, "loss": 0.0003979573375545442, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1349, "train_speed(iter/s)": 0.029691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 82.54166984558105, "completions/min_length": 46.5, "epoch": 2.0119136262099775, "grad_norm": 1.6972712106338048, "kl": 0.3974609375, "learning_rate": 9.108605357946458e-07, "loss": -0.007012729998677969, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.44886354357004166, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1350, "train_speed(iter/s)": 0.029686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.75, "completions/mean_length": 80.38541793823242, "completions/min_length": 52.5, "epoch": 2.0134028294862247, "grad_norm": 0.870352311987569, "kl": 0.39501953125, "learning_rate": 9.107257140300104e-07, "loss": 0.0013922639191150665, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1351, "train_speed(iter/s)": 0.029687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 80.36458587646484, "completions/min_length": 54.25, "epoch": 2.014892032762472, "grad_norm": 2.2263084763377123, "kl": 0.38818359375, "learning_rate": 9.105908003784142e-07, "loss": -0.01771257258951664, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.1034691073000431, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1352, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 79.97916793823242, "completions/min_length": 51.5, "epoch": 2.016381236038719, "grad_norm": 0.005795623720819175, "kl": 0.3955078125, "learning_rate": 9.104557948700397e-07, "loss": 0.00039535490213893354, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1353, "train_speed(iter/s)": 0.02969 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 117.0, "completions/mean_length": 79.71875381469727, "completions/min_length": 50.25, "epoch": 2.0178704393149665, "grad_norm": 1.4733110062299661, "kl": 0.3896484375, "learning_rate": 9.103206975350901e-07, "loss": -0.00041706737829372287, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.49030808359384537, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1354, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.5, "completions/mean_length": 82.77083587646484, "completions/min_length": 53.5, "epoch": 2.0193596425912137, "grad_norm": 5.011537474874332, "kl": 0.36962890625, "learning_rate": 9.101855084037892e-07, "loss": -0.004564403090626001, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1355, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 110.75, "completions/mean_length": 76.78125190734863, "completions/min_length": 47.25, "epoch": 2.020848845867461, "grad_norm": 1.131595078736957, "kl": 0.4267578125, "learning_rate": 9.100502275063814e-07, "loss": 0.012147285044193268, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.46742958575487137, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1356, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 78.20833587646484, "completions/min_length": 43.5, "epoch": 2.022338049143708, "grad_norm": 1.6224566773976805, "kl": 0.41455078125, "learning_rate": 9.099148548731315e-07, "loss": 0.0059060449711978436, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1357, "train_speed(iter/s)": 0.02969 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.0, "completions/mean_length": 78.54166984558105, "completions/min_length": 45.25, "epoch": 2.0238272524199554, "grad_norm": 1.9805485831873622, "kl": 0.40283203125, "learning_rate": 9.09779390534325e-07, "loss": 0.014592595398426056, "memory(GiB)": 112.53, "reward": 1.4895833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.48958334140479565, "rewards/CineAccuracyORM/std": 0.3182126581668854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1358, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 80.82291793823242, "completions/min_length": 48.5, "epoch": 2.0253164556962027, "grad_norm": 0.9202708619789305, "kl": 0.4140625, "learning_rate": 9.096438345202675e-07, "loss": -0.001908156438730657, "memory(GiB)": 112.53, "reward": 1.6145833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833358168602, "rewards/CineAccuracyORM/std": 0.12724433839321136, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1359, "train_speed(iter/s)": 0.029691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 81.29166793823242, "completions/min_length": 47.25, "epoch": 2.02680565897245, "grad_norm": 0.8512673245020616, "kl": 0.40234375, "learning_rate": 9.095081868612855e-07, "loss": -0.006662644445896149, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1360, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.25, "completions/mean_length": 80.77083778381348, "completions/min_length": 48.5, "epoch": 2.028294862248697, "grad_norm": 5.140283701971083, "kl": 0.3828125, "learning_rate": 9.09372447587726e-07, "loss": -0.005167446099221706, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1361, "train_speed(iter/s)": 0.029693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 114.5, "completions/mean_length": 80.22916984558105, "completions/min_length": 44.75, "epoch": 2.029784065524944, "grad_norm": 0.0087544738486441, "kl": 0.41357421875, "learning_rate": 9.092366167299565e-07, "loss": 0.00041352974949404597, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1362, "train_speed(iter/s)": 0.029682 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 84.61458587646484, "completions/min_length": 53.25, "epoch": 2.031273268801191, "grad_norm": 3.7342108834812318, "kl": 0.3671875, "learning_rate": 9.091006943183645e-07, "loss": 0.005177910439670086, "memory(GiB)": 112.53, "reward": 1.5520834028720856, "reward_std": 0.11258216388523579, "rewards/CineAccuracyORM/mean": 0.5520833544433117, "rewards/CineAccuracyORM/std": 0.39249279722571373, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1363, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 79.69791984558105, "completions/min_length": 47.25, "epoch": 2.0327624720774384, "grad_norm": 1.6654743379540802, "kl": 0.3798828125, "learning_rate": 9.089646803833588e-07, "loss": -0.02766135334968567, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.14659820310771465, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1364, "train_speed(iter/s)": 0.029691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 116.5, "completions/mean_length": 77.04167175292969, "completions/min_length": 49.25, "epoch": 2.0342516753536857, "grad_norm": 2.741625343145304, "kl": 0.39306640625, "learning_rate": 9.088285749553679e-07, "loss": -0.0036065762396901846, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1365, "train_speed(iter/s)": 0.029693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 79.79166984558105, "completions/min_length": 50.5, "epoch": 2.035740878629933, "grad_norm": 3.7390993937949846, "kl": 0.38671875, "learning_rate": 9.086923780648412e-07, "loss": 0.011187325231730938, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.11713541857898235, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1366, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 82.69791984558105, "completions/min_length": 51.0, "epoch": 2.03723008190618, "grad_norm": 1.7271945591940367, "kl": 0.38330078125, "learning_rate": 9.085560897422487e-07, "loss": -0.01629013940691948, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6562500102445483, "rewards/CineAccuracyORM/std": 0.24199935421347618, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1367, "train_speed(iter/s)": 0.0297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 80.85416793823242, "completions/min_length": 46.25, "epoch": 2.0387192851824274, "grad_norm": 0.004961349076471462, "kl": 0.3818359375, "learning_rate": 9.084197100180803e-07, "loss": 0.00038209452759474516, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1368, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.25, "completions/mean_length": 79.72916793823242, "completions/min_length": 53.0, "epoch": 2.0402084884586746, "grad_norm": 0.005264602249455206, "kl": 0.39794921875, "learning_rate": 9.082832389228469e-07, "loss": 0.00039809438749216497, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1369, "train_speed(iter/s)": 0.029686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 85.97916984558105, "completions/min_length": 49.0, "epoch": 2.041697691734922, "grad_norm": 0.9145150851855162, "kl": 0.38134765625, "learning_rate": 9.081466764870794e-07, "loss": -0.005766267888247967, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1370, "train_speed(iter/s)": 0.029681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 80.85416984558105, "completions/min_length": 51.0, "epoch": 2.043186895011169, "grad_norm": 1.1088875121781012, "kl": 0.41162109375, "learning_rate": 9.080100227413296e-07, "loss": 0.0030182660557329655, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.2819983549416065, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1371, "train_speed(iter/s)": 0.029664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.5, "completions/mean_length": 82.28125381469727, "completions/min_length": 51.25, "epoch": 2.0446760982874164, "grad_norm": 1.2378427380237447, "kl": 0.3896484375, "learning_rate": 9.078732777161693e-07, "loss": 0.003942016512155533, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1372, "train_speed(iter/s)": 0.029672 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 79.04166984558105, "completions/min_length": 48.25, "epoch": 2.0461653015636636, "grad_norm": 1.625273860842892, "kl": 0.40576171875, "learning_rate": 9.077364414421909e-07, "loss": 0.004788368009030819, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.3320881873369217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1373, "train_speed(iter/s)": 0.029666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 87.66666984558105, "completions/min_length": 56.0, "epoch": 2.047654504839911, "grad_norm": 0.004727914423896754, "kl": 0.373046875, "learning_rate": 9.075995139500072e-07, "loss": 0.0003728731535375118, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1374, "train_speed(iter/s)": 0.029668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 78.96875, "completions/min_length": 50.25, "epoch": 2.0491437081161576, "grad_norm": 0.005245430129239233, "kl": 0.39501953125, "learning_rate": 9.074624952702517e-07, "loss": 0.0003947994555346668, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1375, "train_speed(iter/s)": 0.029675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.5, "completions/mean_length": 81.15625190734863, "completions/min_length": 50.25, "epoch": 2.050632911392405, "grad_norm": 0.8531812367325161, "kl": 0.390625, "learning_rate": 9.073253854335776e-07, "loss": 0.002951804082840681, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1376, "train_speed(iter/s)": 0.029677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.5, "completions/mean_length": 79.75000190734863, "completions/min_length": 46.5, "epoch": 2.052122114668652, "grad_norm": 1.5825770427778427, "kl": 0.42626953125, "learning_rate": 9.071881844706592e-07, "loss": 0.017540423199534416, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.29910537227988243, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1377, "train_speed(iter/s)": 0.02968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 84.22916984558105, "completions/min_length": 48.5, "epoch": 2.0536113179448994, "grad_norm": 0.005316025732617617, "kl": 0.38232421875, "learning_rate": 9.070508924121909e-07, "loss": 0.000382622005417943, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1378, "train_speed(iter/s)": 0.029681 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 79.82291984558105, "completions/min_length": 51.25, "epoch": 2.0551005212211466, "grad_norm": 0.7842307948520574, "kl": 0.4013671875, "learning_rate": 9.069135092888873e-07, "loss": 0.0008892063051462173, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1379, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 82.02083778381348, "completions/min_length": 53.25, "epoch": 2.056589724497394, "grad_norm": 0.0055105347617314445, "kl": 0.396484375, "learning_rate": 9.067760351314837e-07, "loss": 0.00039561683661304414, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1380, "train_speed(iter/s)": 0.029685 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.5, "completions/mean_length": 78.83333587646484, "completions/min_length": 52.0, "epoch": 2.058078927773641, "grad_norm": 0.8444500669940853, "kl": 0.39892578125, "learning_rate": 9.066384699707357e-07, "loss": 0.00016641031834296882, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1381, "train_speed(iter/s)": 0.029692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.75, "completions/mean_length": 82.71875190734863, "completions/min_length": 46.5, "epoch": 2.0595681310498883, "grad_norm": 1.4160338331844469, "kl": 0.3837890625, "learning_rate": 9.065008138374188e-07, "loss": 0.0025237216614186764, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.39581216871738434, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 1382, "train_speed(iter/s)": 0.029688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 84.51041793823242, "completions/min_length": 52.5, "epoch": 2.0610573343261356, "grad_norm": 0.71801348408408, "kl": 0.36767578125, "learning_rate": 9.063630667623296e-07, "loss": 0.0001872691500466317, "memory(GiB)": 112.53, "reward": 1.4479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.4479166716337204, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1383, "train_speed(iter/s)": 0.029685 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 74.58333587646484, "completions/min_length": 42.5, "epoch": 2.062546537602383, "grad_norm": 0.00538857166680066, "kl": 0.43359375, "learning_rate": 9.062252287762846e-07, "loss": 0.0004336638958193362, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1384, "train_speed(iter/s)": 0.029687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.5, "completions/mean_length": 79.57291793823242, "completions/min_length": 51.0, "epoch": 2.06403574087863, "grad_norm": 0.792575535063511, "kl": 0.39013671875, "learning_rate": 9.060872999101206e-07, "loss": -0.007983945310115814, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1385, "train_speed(iter/s)": 0.029695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 82.69791984558105, "completions/min_length": 47.75, "epoch": 2.0655249441548773, "grad_norm": 0.005194325675588262, "kl": 0.40234375, "learning_rate": 9.059492801946949e-07, "loss": 0.000402454927098006, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1386, "train_speed(iter/s)": 0.029683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.5, "completions/mean_length": 82.33333778381348, "completions/min_length": 49.25, "epoch": 2.0670141474311245, "grad_norm": 0.6798002090638813, "kl": 0.3916015625, "learning_rate": 9.058111696608852e-07, "loss": 0.0035371992271393538, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1387, "train_speed(iter/s)": 0.029677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 86.53125190734863, "completions/min_length": 49.0, "epoch": 2.0685033507073713, "grad_norm": 0.005320682837210845, "kl": 0.39990234375, "learning_rate": 9.056729683395892e-07, "loss": 0.000399331736844033, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1388, "train_speed(iter/s)": 0.029677 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 83.26041984558105, "completions/min_length": 51.0, "epoch": 2.0699925539836186, "grad_norm": 0.0052654917660061576, "kl": 0.40087890625, "learning_rate": 9.055346762617251e-07, "loss": 0.0004011731070932001, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1389, "train_speed(iter/s)": 0.029665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 87.71875190734863, "completions/min_length": 53.5, "epoch": 2.071481757259866, "grad_norm": 0.7918631348954893, "kl": 0.3818359375, "learning_rate": 9.053962934582315e-07, "loss": 0.00321391224861145, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1390, "train_speed(iter/s)": 0.029661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.25, "completions/mean_length": 84.94791984558105, "completions/min_length": 43.0, "epoch": 2.072970960536113, "grad_norm": 0.004935509896967443, "kl": 0.40087890625, "learning_rate": 9.052578199600673e-07, "loss": 0.00040065578650683165, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1391, "train_speed(iter/s)": 0.029659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 89.06250381469727, "completions/min_length": 54.75, "epoch": 2.0744601638123603, "grad_norm": 0.0049224494106670106, "kl": 0.39111328125, "learning_rate": 9.051192557982114e-07, "loss": 0.0003903489268850535, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1392, "train_speed(iter/s)": 0.029664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 85.50000190734863, "completions/min_length": 53.0, "epoch": 2.0759493670886076, "grad_norm": 1.5951331139106004, "kl": 0.38720703125, "learning_rate": 9.049806010036633e-07, "loss": -0.0022160294465720654, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.08908708393573761, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.2228618562221527, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1393, "train_speed(iter/s)": 0.029654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 87.60416793823242, "completions/min_length": 48.25, "epoch": 2.077438570364855, "grad_norm": 0.0053035409017862705, "kl": 0.3935546875, "learning_rate": 9.048418556074424e-07, "loss": 0.0003938900772482157, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1394, "train_speed(iter/s)": 0.029649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 89.16666984558105, "completions/min_length": 53.25, "epoch": 2.078927773641102, "grad_norm": 1.0486677091929375, "kl": 0.37841796875, "learning_rate": 9.047030196405889e-07, "loss": 0.0014092964120209217, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1395, "train_speed(iter/s)": 0.029656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 92.86458587646484, "completions/min_length": 44.5, "epoch": 2.0804169769173493, "grad_norm": 0.004978268595919744, "kl": 0.37158203125, "learning_rate": 9.04564093134163e-07, "loss": 0.00037181968218646944, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1396, "train_speed(iter/s)": 0.029658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 88.16666984558105, "completions/min_length": 56.0, "epoch": 2.0819061801935965, "grad_norm": 1.5209471322732797, "kl": 0.375, "learning_rate": 9.04425076119245e-07, "loss": 0.0019203117117285728, "memory(GiB)": 112.53, "reward": 1.4166666865348816, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.45438022166490555, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1397, "train_speed(iter/s)": 0.029659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 86.35416984558105, "completions/min_length": 52.25, "epoch": 2.0833953834698438, "grad_norm": 0.6223972999953228, "kl": 0.388671875, "learning_rate": 9.042859686269356e-07, "loss": -0.0018276867922395468, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1398, "train_speed(iter/s)": 0.02966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 84.93750190734863, "completions/min_length": 51.25, "epoch": 2.084884586746091, "grad_norm": 0.9093721707943153, "kl": 0.36669921875, "learning_rate": 9.041467706883556e-07, "loss": 0.013012843206524849, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1399, "train_speed(iter/s)": 0.029667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 92.21875190734863, "completions/min_length": 59.75, "epoch": 2.0863737900223382, "grad_norm": 1.322190309281695, "kl": 0.37890625, "learning_rate": 9.040074823346464e-07, "loss": 0.01855899766087532, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.6250000149011612, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1400, "train_speed(iter/s)": 0.029668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 88.62500190734863, "completions/min_length": 53.5, "epoch": 2.087862993298585, "grad_norm": 1.345710841592954, "kl": 0.37890625, "learning_rate": 9.038681035969694e-07, "loss": 0.010365206748247147, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4785975143313408, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1401, "train_speed(iter/s)": 0.029664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 82.92708587646484, "completions/min_length": 54.5, "epoch": 2.0893521965748323, "grad_norm": 0.005612902374792626, "kl": 0.38525390625, "learning_rate": 9.037286345065059e-07, "loss": 0.000385039922548458, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1402, "train_speed(iter/s)": 0.029671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 83.68750190734863, "completions/min_length": 51.75, "epoch": 2.0908413998510795, "grad_norm": 0.7981695454736982, "kl": 0.369140625, "learning_rate": 9.035890750944581e-07, "loss": -0.010303066112101078, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1403, "train_speed(iter/s)": 0.029673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 87.20833587646484, "completions/min_length": 51.75, "epoch": 2.0923306031273268, "grad_norm": 1.2043203078249118, "kl": 0.4013671875, "learning_rate": 9.034494253920478e-07, "loss": -0.002941216342151165, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.5312500186264515, "rewards/CineAccuracyORM/std": 0.38697611913084984, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1404, "train_speed(iter/s)": 0.029668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 83.72916793823242, "completions/min_length": 52.25, "epoch": 2.093819806403574, "grad_norm": 0.9148801855370917, "kl": 0.37158203125, "learning_rate": 9.033096854305171e-07, "loss": 0.0021137616131454706, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1405, "train_speed(iter/s)": 0.029666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 88.31250190734863, "completions/min_length": 46.0, "epoch": 2.0953090096798213, "grad_norm": 0.005518498855226458, "kl": 0.35546875, "learning_rate": 9.031698552411289e-07, "loss": 0.00035593187203630805, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1406, "train_speed(iter/s)": 0.029673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.75, "completions/mean_length": 83.02083587646484, "completions/min_length": 52.75, "epoch": 2.0967982129560685, "grad_norm": 1.3389221171555543, "kl": 0.390625, "learning_rate": 9.030299348551653e-07, "loss": 0.012632779777050018, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.10661446116864681, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.47294626384973526, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1407, "train_speed(iter/s)": 0.029675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 82.87500190734863, "completions/min_length": 48.75, "epoch": 2.0982874162323157, "grad_norm": 0.5782780913720139, "kl": 0.39501953125, "learning_rate": 9.028899243039292e-07, "loss": -0.004168206360191107, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1408, "train_speed(iter/s)": 0.029673 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 91.30208396911621, "completions/min_length": 51.0, "epoch": 2.099776619508563, "grad_norm": 0.7947982484926188, "kl": 0.3681640625, "learning_rate": 9.027498236187439e-07, "loss": -0.0067887320183217525, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1409, "train_speed(iter/s)": 0.029668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 81.20833587646484, "completions/min_length": 47.25, "epoch": 2.1012658227848102, "grad_norm": 0.5969708192751458, "kl": 0.3916015625, "learning_rate": 9.026096328309521e-07, "loss": 0.0028599051292985678, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1410, "train_speed(iter/s)": 0.029669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 83.75000381469727, "completions/min_length": 53.25, "epoch": 2.1027550260610575, "grad_norm": 1.6039129202489324, "kl": 0.365234375, "learning_rate": 9.024693519719173e-07, "loss": 0.0012583646457642317, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.11572097800672054, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.32777874171733856, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1411, "train_speed(iter/s)": 0.02966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 83.53125381469727, "completions/min_length": 54.5, "epoch": 2.1042442293373047, "grad_norm": 0.005071829110687472, "kl": 0.36767578125, "learning_rate": 9.023289810730229e-07, "loss": 0.0003684100229293108, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1412, "train_speed(iter/s)": 0.029661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 79.29166984558105, "completions/min_length": 51.5, "epoch": 2.105733432613552, "grad_norm": 0.004771736332392131, "kl": 0.380859375, "learning_rate": 9.021885201656725e-07, "loss": 0.00038095167838037014, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1413, "train_speed(iter/s)": 0.029662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 80.92708778381348, "completions/min_length": 52.75, "epoch": 2.1072226358897987, "grad_norm": 0.7866424576167764, "kl": 0.38037109375, "learning_rate": 9.020479692812896e-07, "loss": 0.006748726591467857, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1414, "train_speed(iter/s)": 0.02967 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.25, "completions/mean_length": 84.70833587646484, "completions/min_length": 53.0, "epoch": 2.108711839166046, "grad_norm": 1.7018936220416578, "kl": 0.36328125, "learning_rate": 9.019073284513183e-07, "loss": 0.00920190941542387, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4595789238810539, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1415, "train_speed(iter/s)": 0.029665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.5, "completions/mean_length": 84.54166984558105, "completions/min_length": 49.75, "epoch": 2.1102010424422932, "grad_norm": 0.004681823136414329, "kl": 0.38330078125, "learning_rate": 9.017665977072226e-07, "loss": 0.00038342070183716714, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1416, "train_speed(iter/s)": 0.029669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.75, "completions/mean_length": 82.75000381469727, "completions/min_length": 56.0, "epoch": 2.1116902457185405, "grad_norm": 1.707818302875594, "kl": 0.365234375, "learning_rate": 9.016257770804862e-07, "loss": -0.019787389785051346, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3699222281575203, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1417, "train_speed(iter/s)": 0.029671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 88.39583396911621, "completions/min_length": 54.5, "epoch": 2.1131794489947877, "grad_norm": 0.7554367211540876, "kl": 0.369140625, "learning_rate": 9.014848666026138e-07, "loss": 0.00784507766366005, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1418, "train_speed(iter/s)": 0.029666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 84.29166793823242, "completions/min_length": 48.75, "epoch": 2.114668652271035, "grad_norm": 0.004629017740641183, "kl": 0.37353515625, "learning_rate": 9.013438663051292e-07, "loss": 0.0003737911465577781, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1419, "train_speed(iter/s)": 0.029655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 82.44791984558105, "completions/min_length": 49.5, "epoch": 2.116157855547282, "grad_norm": 0.004329380495416001, "kl": 0.36328125, "learning_rate": 9.01202776219577e-07, "loss": 0.00036301324144005775, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1420, "train_speed(iter/s)": 0.029645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 83.12500381469727, "completions/min_length": 46.0, "epoch": 2.1176470588235294, "grad_norm": 0.00441709010377182, "kl": 0.35693359375, "learning_rate": 9.010615963775219e-07, "loss": 0.00035694692633114755, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1421, "train_speed(iter/s)": 0.029652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 90.66666793823242, "completions/min_length": 50.75, "epoch": 2.1191362620997767, "grad_norm": 1.0209181455758742, "kl": 0.32470703125, "learning_rate": 9.009203268105481e-07, "loss": -0.0006179859628900886, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1422, "train_speed(iter/s)": 0.029653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 86.16666984558105, "completions/min_length": 53.0, "epoch": 2.120625465376024, "grad_norm": 1.65045511479482, "kl": 0.34033203125, "learning_rate": 9.007789675502603e-07, "loss": -0.0026596803218126297, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1423, "train_speed(iter/s)": 0.029654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 88.38541793823242, "completions/min_length": 52.5, "epoch": 2.122114668652271, "grad_norm": 0.9975074765514287, "kl": 0.34814453125, "learning_rate": 9.006375186282832e-07, "loss": 0.013393394649028778, "memory(GiB)": 112.53, "reward": 1.7083333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1424, "train_speed(iter/s)": 0.029653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 85.60416984558105, "completions/min_length": 46.75, "epoch": 2.1236038719285184, "grad_norm": 0.0038857117071666603, "kl": 0.3154296875, "learning_rate": 9.004959800762617e-07, "loss": 0.0003153999277856201, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1425, "train_speed(iter/s)": 0.02966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 84.18750190734863, "completions/min_length": 47.5, "epoch": 2.1250930752047656, "grad_norm": 1.0753823789096093, "kl": 0.3486328125, "learning_rate": 9.003543519258604e-07, "loss": 0.000704888952895999, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1426, "train_speed(iter/s)": 0.029661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.0, "completions/mean_length": 85.66666984558105, "completions/min_length": 47.0, "epoch": 2.1265822784810124, "grad_norm": 1.6583676930528568, "kl": 0.30908203125, "learning_rate": 9.002126342087641e-07, "loss": -0.008445587009191513, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.1315174512565136, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1427, "train_speed(iter/s)": 0.029663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 86.05208587646484, "completions/min_length": 46.25, "epoch": 2.1280714817572597, "grad_norm": 0.00449300536014414, "kl": 0.35498046875, "learning_rate": 9.00070826956678e-07, "loss": 0.0003549584362190217, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1428, "train_speed(iter/s)": 0.029659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 88.41666793823242, "completions/min_length": 57.25, "epoch": 2.129560685033507, "grad_norm": 0.004322891111949263, "kl": 0.333984375, "learning_rate": 8.999289302013268e-07, "loss": 0.0003343512653373182, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1429, "train_speed(iter/s)": 0.02966 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 88.29166984558105, "completions/min_length": 46.75, "epoch": 2.131049888309754, "grad_norm": 0.0041242604100730175, "kl": 0.32177734375, "learning_rate": 8.997869439744554e-07, "loss": 0.00032213670783676207, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1430, "train_speed(iter/s)": 0.029655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.75, "completions/mean_length": 82.60416793823242, "completions/min_length": 32.0, "epoch": 2.1325390915860014, "grad_norm": 1.0603987232118537, "kl": 0.36669921875, "learning_rate": 8.996448683078288e-07, "loss": 0.005018685013055801, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1431, "train_speed(iter/s)": 0.029645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 83.22916793823242, "completions/min_length": 50.5, "epoch": 2.1340282948622487, "grad_norm": 0.9202211768296248, "kl": 0.35888671875, "learning_rate": 8.995027032332321e-07, "loss": 0.0030357344076037407, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1432, "train_speed(iter/s)": 0.029642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 86.88541984558105, "completions/min_length": 48.5, "epoch": 2.135517498138496, "grad_norm": 0.00506795036235815, "kl": 0.35888671875, "learning_rate": 8.9936044878247e-07, "loss": 0.0003584801161196083, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1433, "train_speed(iter/s)": 0.029644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 84.34375381469727, "completions/min_length": 49.25, "epoch": 2.137006701414743, "grad_norm": 0.00412486630680941, "kl": 0.33740234375, "learning_rate": 8.992181049873675e-07, "loss": 0.00033765932312235236, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1434, "train_speed(iter/s)": 0.029645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 81.10416793823242, "completions/min_length": 50.25, "epoch": 2.1384959046909904, "grad_norm": 0.004806141221773633, "kl": 0.357421875, "learning_rate": 8.990756718797699e-07, "loss": 0.0003577863099053502, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1435, "train_speed(iter/s)": 0.029647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 87.55208587646484, "completions/min_length": 57.25, "epoch": 2.1399851079672376, "grad_norm": 0.7477108698326046, "kl": 0.33984375, "learning_rate": 8.989331494915416e-07, "loss": 0.008994806557893753, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1436, "train_speed(iter/s)": 0.029642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 81.71875381469727, "completions/min_length": 44.25, "epoch": 2.141474311243485, "grad_norm": 0.005127224284248496, "kl": 0.3310546875, "learning_rate": 8.987905378545679e-07, "loss": 0.00033096520928665996, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1437, "train_speed(iter/s)": 0.029638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 81.38541984558105, "completions/min_length": 44.75, "epoch": 2.142963514519732, "grad_norm": 0.004389976787972282, "kl": 0.3466796875, "learning_rate": 8.986478370007535e-07, "loss": 0.0003467603528406471, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1438, "train_speed(iter/s)": 0.029633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.75, "completions/mean_length": 83.42708587646484, "completions/min_length": 43.25, "epoch": 2.1444527177959793, "grad_norm": 0.004290227355316619, "kl": 0.33984375, "learning_rate": 8.985050469620234e-07, "loss": 0.0003395797102712095, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1439, "train_speed(iter/s)": 0.029635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.0, "completions/mean_length": 84.13541793823242, "completions/min_length": 43.5, "epoch": 2.145941921072226, "grad_norm": 1.0673823125325572, "kl": 0.3564453125, "learning_rate": 8.983621677703222e-07, "loss": 0.0027979451697319746, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1440, "train_speed(iter/s)": 0.029625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.5, "completions/mean_length": 85.58333587646484, "completions/min_length": 51.0, "epoch": 2.1474311243484734, "grad_norm": 2.779450380048378, "kl": 0.33447265625, "learning_rate": 8.982191994576145e-07, "loss": -0.001824796898290515, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.3712640330195427, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1441, "train_speed(iter/s)": 0.029627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 87.45833587646484, "completions/min_length": 43.75, "epoch": 2.1489203276247206, "grad_norm": 1.2993906236894148, "kl": 0.337890625, "learning_rate": 8.980761420558853e-07, "loss": -0.006667815148830414, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1442, "train_speed(iter/s)": 0.029628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 82.22916793823242, "completions/min_length": 42.0, "epoch": 2.150409530900968, "grad_norm": 0.0041021664034303785, "kl": 0.35986328125, "learning_rate": 8.97932995597139e-07, "loss": 0.0003595263115130365, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1443, "train_speed(iter/s)": 0.029625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 88.15625190734863, "completions/min_length": 36.5, "epoch": 2.151898734177215, "grad_norm": 0.0041045873194203025, "kl": 0.32958984375, "learning_rate": 8.977897601134002e-07, "loss": 0.0003293908084742725, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1444, "train_speed(iter/s)": 0.029621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 81.95833778381348, "completions/min_length": 47.75, "epoch": 2.1533879374534624, "grad_norm": 0.9950736475564034, "kl": 0.3369140625, "learning_rate": 8.976464356367133e-07, "loss": 0.0009914160473272204, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 1445, "train_speed(iter/s)": 0.029611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 121.0, "completions/mean_length": 77.70833587646484, "completions/min_length": 45.0, "epoch": 2.1548771407297096, "grad_norm": 1.3940256212963231, "kl": 0.376953125, "learning_rate": 8.975030221991425e-07, "loss": 0.004256724379956722, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1446, "train_speed(iter/s)": 0.029612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 82.11458587646484, "completions/min_length": 42.5, "epoch": 2.156366344005957, "grad_norm": 0.004024309178697997, "kl": 0.3466796875, "learning_rate": 8.973595198327722e-07, "loss": 0.0003465634654276073, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1447, "train_speed(iter/s)": 0.029608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 84.30208587646484, "completions/min_length": 49.0, "epoch": 2.157855547282204, "grad_norm": 1.1098208048327227, "kl": 0.33740234375, "learning_rate": 8.972159285697066e-07, "loss": 0.004033282864838839, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1448, "train_speed(iter/s)": 0.029615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.0, "completions/mean_length": 82.00000190734863, "completions/min_length": 46.25, "epoch": 2.1593447505584513, "grad_norm": 1.7492009191321152, "kl": 0.361328125, "learning_rate": 8.970722484420695e-07, "loss": 0.00023590463388245553, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1449, "train_speed(iter/s)": 0.029621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.75, "completions/mean_length": 85.72916793823242, "completions/min_length": 51.5, "epoch": 2.1608339538346986, "grad_norm": 0.8927121523086942, "kl": 0.3515625, "learning_rate": 8.969284794820052e-07, "loss": 0.003662846051156521, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1450, "train_speed(iter/s)": 0.029623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 90.56250381469727, "completions/min_length": 56.75, "epoch": 2.162323157110946, "grad_norm": 0.8549378113854305, "kl": 0.345703125, "learning_rate": 8.96784621721677e-07, "loss": -0.009814055636525154, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1451, "train_speed(iter/s)": 0.029625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 87.29166984558105, "completions/min_length": 53.75, "epoch": 2.163812360387193, "grad_norm": 0.004432646657738621, "kl": 0.33837890625, "learning_rate": 8.966406751932688e-07, "loss": 0.0003384254814591259, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1452, "train_speed(iter/s)": 0.02963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 84.46875381469727, "completions/min_length": 47.0, "epoch": 2.16530156366344, "grad_norm": 0.9538386906512365, "kl": 0.3564453125, "learning_rate": 8.964966399289841e-07, "loss": -0.0009735770290717483, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1453, "train_speed(iter/s)": 0.029622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 84.01041793823242, "completions/min_length": 45.75, "epoch": 2.166790766939687, "grad_norm": 1.4264501912057055, "kl": 0.353515625, "learning_rate": 8.963525159610464e-07, "loss": 0.003286411054432392, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1454, "train_speed(iter/s)": 0.029617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 90.50000381469727, "completions/min_length": 52.5, "epoch": 2.1682799702159343, "grad_norm": 0.004208655862289488, "kl": 0.34375, "learning_rate": 8.962083033216987e-07, "loss": 0.0003443804453127086, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1455, "train_speed(iter/s)": 0.029612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 88.72916984558105, "completions/min_length": 52.5, "epoch": 2.1697691734921816, "grad_norm": 0.004360172323652401, "kl": 0.32421875, "learning_rate": 8.96064002043204e-07, "loss": 0.0003243529354222119, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1456, "train_speed(iter/s)": 0.029619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 111.25, "completions/mean_length": 82.12500190734863, "completions/min_length": 54.5, "epoch": 2.171258376768429, "grad_norm": 0.005402661514047396, "kl": 0.37744140625, "learning_rate": 8.959196121578454e-07, "loss": 0.0003775020595639944, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1457, "train_speed(iter/s)": 0.029621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 85.17708587646484, "completions/min_length": 52.5, "epoch": 2.172747580044676, "grad_norm": 0.548280345914497, "kl": 0.6923828125, "learning_rate": 8.957751336979255e-07, "loss": -0.004933847114443779, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1458, "train_speed(iter/s)": 0.029624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.25, "completions/mean_length": 81.60416984558105, "completions/min_length": 47.0, "epoch": 2.1742367833209233, "grad_norm": 0.004128763148785429, "kl": 0.34619140625, "learning_rate": 8.956305666957666e-07, "loss": 0.00034570571733638644, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1459, "train_speed(iter/s)": 0.029626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.5, "completions/mean_length": 92.05208587646484, "completions/min_length": 46.75, "epoch": 2.1757259865971705, "grad_norm": 0.005882348386001467, "kl": 0.333984375, "learning_rate": 8.954859111837115e-07, "loss": 0.0003330037579871714, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1460, "train_speed(iter/s)": 0.029621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 89.16666793823242, "completions/min_length": 48.0, "epoch": 2.1772151898734178, "grad_norm": 0.7364620946203063, "kl": 0.3427734375, "learning_rate": 8.953411671941218e-07, "loss": -0.002068157307803631, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1461, "train_speed(iter/s)": 0.029622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.0, "completions/mean_length": 87.01041984558105, "completions/min_length": 48.25, "epoch": 2.178704393149665, "grad_norm": 0.006275637044435536, "kl": 0.3310546875, "learning_rate": 8.951963347593796e-07, "loss": 0.0003307514707557857, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1462, "train_speed(iter/s)": 0.029629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 120.75, "completions/mean_length": 80.03125381469727, "completions/min_length": 50.75, "epoch": 2.1801935964259123, "grad_norm": 0.004207269500579258, "kl": 0.3232421875, "learning_rate": 8.950514139118867e-07, "loss": 0.00032281765015795827, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1463, "train_speed(iter/s)": 0.029636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.25, "completions/mean_length": 83.27083396911621, "completions/min_length": 45.0, "epoch": 2.1816827997021595, "grad_norm": 0.004223180329012329, "kl": 0.34765625, "learning_rate": 8.949064046840646e-07, "loss": 0.0003475579433143139, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1464, "train_speed(iter/s)": 0.029638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 82.53125190734863, "completions/min_length": 46.25, "epoch": 2.1831720029784067, "grad_norm": 0.0039139657735330164, "kl": 0.3212890625, "learning_rate": 8.947613071083544e-07, "loss": 0.0003219287027604878, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1465, "train_speed(iter/s)": 0.029645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.25, "completions/mean_length": 88.44791793823242, "completions/min_length": 46.0, "epoch": 2.1846612062546535, "grad_norm": 0.004106530610804507, "kl": 0.318359375, "learning_rate": 8.946161212172172e-07, "loss": 0.0003180491039529443, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1466, "train_speed(iter/s)": 0.029647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.5, "completions/mean_length": 87.09375381469727, "completions/min_length": 50.25, "epoch": 2.186150409530901, "grad_norm": 0.004194540011944271, "kl": 0.3154296875, "learning_rate": 8.944708470431339e-07, "loss": 0.0003150893608108163, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1467, "train_speed(iter/s)": 0.029654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 87.65625190734863, "completions/min_length": 51.75, "epoch": 2.187639612807148, "grad_norm": 0.7836850630774108, "kl": 0.32666015625, "learning_rate": 8.943254846186047e-07, "loss": 0.005183390807360411, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1468, "train_speed(iter/s)": 0.02965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.0, "completions/mean_length": 87.21875381469727, "completions/min_length": 47.75, "epoch": 2.1891288160833953, "grad_norm": 1.1682917317504904, "kl": 0.31640625, "learning_rate": 8.941800339761502e-07, "loss": 0.00439694058150053, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1469, "train_speed(iter/s)": 0.029652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 82.05208587646484, "completions/min_length": 42.0, "epoch": 2.1906180193596425, "grad_norm": 2.0340512531889408, "kl": 0.33984375, "learning_rate": 8.940344951483103e-07, "loss": 0.0031051102560013533, "memory(GiB)": 112.53, "reward": 1.6041667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1470, "train_speed(iter/s)": 0.029655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 134.5, "completions/mean_length": 83.06250190734863, "completions/min_length": 40.25, "epoch": 2.1921072226358898, "grad_norm": 0.004586827530069589, "kl": 0.3212890625, "learning_rate": 8.938888681676446e-07, "loss": 0.0003210966242477298, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1471, "train_speed(iter/s)": 0.029663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 86.11458587646484, "completions/min_length": 45.75, "epoch": 2.193596425912137, "grad_norm": 1.4860002407155102, "kl": 0.33544921875, "learning_rate": 8.937431530667327e-07, "loss": 0.007783775217831135, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1472, "train_speed(iter/s)": 0.029661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 88.78125190734863, "completions/min_length": 48.25, "epoch": 2.1950856291883842, "grad_norm": 1.2458161024742835, "kl": 0.310546875, "learning_rate": 8.935973498781739e-07, "loss": 0.012442504987120628, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.43965786695480347, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1473, "train_speed(iter/s)": 0.029668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 84.88541984558105, "completions/min_length": 49.25, "epoch": 2.1965748324646315, "grad_norm": 2.0122561354696455, "kl": 0.33203125, "learning_rate": 8.934514586345867e-07, "loss": -0.00535932183265686, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3735327273607254, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 1474, "train_speed(iter/s)": 0.029662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 128.0, "completions/mean_length": 82.09375381469727, "completions/min_length": 51.25, "epoch": 2.1980640357408787, "grad_norm": 0.004207408186642526, "kl": 0.33447265625, "learning_rate": 8.933054793686102e-07, "loss": 0.00033395885839127004, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1475, "train_speed(iter/s)": 0.029662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 84.13541793823242, "completions/min_length": 39.75, "epoch": 2.199553239017126, "grad_norm": 0.003912397370957539, "kl": 0.357421875, "learning_rate": 8.93159412112902e-07, "loss": 0.00035772082628682256, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1476, "train_speed(iter/s)": 0.029658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 88.38541984558105, "completions/min_length": 46.75, "epoch": 2.201042442293373, "grad_norm": 0.004156808370227292, "kl": 0.3173828125, "learning_rate": 8.930132569001406e-07, "loss": 0.00031701120315119624, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1477, "train_speed(iter/s)": 0.029653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 88.82291793823242, "completions/min_length": 48.0, "epoch": 2.2025316455696204, "grad_norm": 1.7590893898779658, "kl": 0.34375, "learning_rate": 8.928670137630234e-07, "loss": 0.0047656153328716755, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1478, "train_speed(iter/s)": 0.029643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 80.04167175292969, "completions/min_length": 35.75, "epoch": 2.2040208488458672, "grad_norm": 0.004010911533730516, "kl": 0.30859375, "learning_rate": 8.927206827342679e-07, "loss": 0.00030866722227074206, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1479, "train_speed(iter/s)": 0.029645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 84.12500190734863, "completions/min_length": 46.25, "epoch": 2.2055100521221145, "grad_norm": 0.004242614205668537, "kl": 0.32958984375, "learning_rate": 8.925742638466109e-07, "loss": 0.00032998196547850966, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1480, "train_speed(iter/s)": 0.029651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 85.30208396911621, "completions/min_length": 41.75, "epoch": 2.2069992553983617, "grad_norm": 0.004266761439535991, "kl": 0.328125, "learning_rate": 8.92427757132809e-07, "loss": 0.0003287346917204559, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1481, "train_speed(iter/s)": 0.029653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 78.78125190734863, "completions/min_length": 29.0, "epoch": 2.208488458674609, "grad_norm": 0.712291271515097, "kl": 0.33349609375, "learning_rate": 8.922811626256383e-07, "loss": 0.008712999522686005, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 1482, "train_speed(iter/s)": 0.029648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 83.87500190734863, "completions/min_length": 45.5, "epoch": 2.209977661950856, "grad_norm": 0.004124110711129866, "kl": 0.31298828125, "learning_rate": 8.921344803578952e-07, "loss": 0.00031288876198232174, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1483, "train_speed(iter/s)": 0.029644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.0, "completions/mean_length": 84.67708778381348, "completions/min_length": 47.0, "epoch": 2.2114668652271035, "grad_norm": 0.004458653687931063, "kl": 0.3232421875, "learning_rate": 8.919877103623948e-07, "loss": 0.00032309495145455003, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1484, "train_speed(iter/s)": 0.029651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 80.69791984558105, "completions/min_length": 48.25, "epoch": 2.2129560685033507, "grad_norm": 0.004028068986101643, "kl": 0.32666015625, "learning_rate": 8.918408526719726e-07, "loss": 0.00032692623790353537, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1485, "train_speed(iter/s)": 0.029647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 81.81250381469727, "completions/min_length": 46.75, "epoch": 2.214445271779598, "grad_norm": 1.0791995193192676, "kl": 0.34521484375, "learning_rate": 8.916939073194831e-07, "loss": -0.0008270481484942138, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1486, "train_speed(iter/s)": 0.029648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 87.62500190734863, "completions/min_length": 47.5, "epoch": 2.215934475055845, "grad_norm": 1.053011763602988, "kl": 0.30712890625, "learning_rate": 8.915468743378007e-07, "loss": 0.0076943617314100266, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1487, "train_speed(iter/s)": 0.02965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 80.56250190734863, "completions/min_length": 45.5, "epoch": 2.2174236783320924, "grad_norm": 1.0571139841512354, "kl": 0.32958984375, "learning_rate": 8.913997537598199e-07, "loss": 0.004732116125524044, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1488, "train_speed(iter/s)": 0.029651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 80.18750381469727, "completions/min_length": 39.5, "epoch": 2.2189128816083397, "grad_norm": 0.004236902861054271, "kl": 0.326171875, "learning_rate": 8.912525456184537e-07, "loss": 0.00032688688952475786, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1489, "train_speed(iter/s)": 0.029648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 85.14583587646484, "completions/min_length": 50.5, "epoch": 2.220402084884587, "grad_norm": 0.856670146478228, "kl": 0.33837890625, "learning_rate": 8.911052499466356e-07, "loss": 0.007702048867940903, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1490, "train_speed(iter/s)": 0.029646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.5, "completions/mean_length": 77.32292175292969, "completions/min_length": 43.75, "epoch": 2.221891288160834, "grad_norm": 0.004181663240163999, "kl": 0.3408203125, "learning_rate": 8.909578667773182e-07, "loss": 0.00034078588942065835, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1491, "train_speed(iter/s)": 0.029641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 124.5, "completions/mean_length": 80.59375190734863, "completions/min_length": 44.5, "epoch": 2.223380491437081, "grad_norm": 0.8737725046535846, "kl": 0.3369140625, "learning_rate": 8.908103961434741e-07, "loss": -0.006809431128203869, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1492, "train_speed(iter/s)": 0.029625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 82.54166984558105, "completions/min_length": 42.5, "epoch": 2.224869694713328, "grad_norm": 1.389026581007633, "kl": 0.34912109375, "learning_rate": 8.906628380780949e-07, "loss": -0.013173017650842667, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1493, "train_speed(iter/s)": 0.029632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.75, "completions/mean_length": 79.66666793823242, "completions/min_length": 36.25, "epoch": 2.2263588979895754, "grad_norm": 1.5999369306409723, "kl": 0.34765625, "learning_rate": 8.905151926141923e-07, "loss": 0.012764173559844494, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.17827537283301353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1494, "train_speed(iter/s)": 0.029628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.0, "completions/mean_length": 79.89583587646484, "completions/min_length": 45.25, "epoch": 2.2278481012658227, "grad_norm": 0.004244139476464889, "kl": 0.34423828125, "learning_rate": 8.903674597847973e-07, "loss": 0.00034470189712010324, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1495, "train_speed(iter/s)": 0.029626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 82.01041793823242, "completions/min_length": 47.75, "epoch": 2.22933730454207, "grad_norm": 1.3962791297517496, "kl": 0.3330078125, "learning_rate": 8.902196396229604e-07, "loss": 0.013812541030347347, "memory(GiB)": 112.53, "reward": 1.875, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.1276884824037552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1496, "train_speed(iter/s)": 0.029632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 84.87500381469727, "completions/min_length": 43.5, "epoch": 2.230826507818317, "grad_norm": 0.004166782198216317, "kl": 0.32080078125, "learning_rate": 8.900717321617519e-07, "loss": 0.000320372695568949, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1497, "train_speed(iter/s)": 0.029634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.75, "completions/mean_length": 81.36458587646484, "completions/min_length": 43.5, "epoch": 2.2323157110945644, "grad_norm": 2.3375603605363424, "kl": 0.322265625, "learning_rate": 8.899237374342612e-07, "loss": -0.0021308339200913906, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.2210759073495865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1498, "train_speed(iter/s)": 0.029641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 85.03125381469727, "completions/min_length": 45.0, "epoch": 2.2338049143708116, "grad_norm": 0.004359869015821902, "kl": 0.33056640625, "learning_rate": 8.897756554735975e-07, "loss": 0.0003305337158963084, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1499, "train_speed(iter/s)": 0.029643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 79.04166984558105, "completions/min_length": 44.75, "epoch": 2.235294117647059, "grad_norm": 0.004235639755023354, "kl": 0.337890625, "learning_rate": 8.896274863128896e-07, "loss": 0.0003379291738383472, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1500, "train_speed(iter/s)": 0.029645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 123.25, "completions/mean_length": 81.40625190734863, "completions/min_length": 40.75, "epoch": 2.236783320923306, "grad_norm": 1.2205692685330598, "kl": 0.34619140625, "learning_rate": 8.894792299852858e-07, "loss": 0.005229791160672903, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1501, "train_speed(iter/s)": 0.029609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 92.16666793823242, "completions/min_length": 52.5, "epoch": 2.2382725241995534, "grad_norm": 0.004091245276646752, "kl": 0.28173828125, "learning_rate": 8.893308865239535e-07, "loss": 0.00028174350154586136, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1502, "train_speed(iter/s)": 0.029615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 81.48958587646484, "completions/min_length": 43.0, "epoch": 2.2397617274758006, "grad_norm": 0.03613265247450164, "kl": 0.34130859375, "learning_rate": 8.8918245596208e-07, "loss": 0.0003407481708563864, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1503, "train_speed(iter/s)": 0.029617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 85.81250190734863, "completions/min_length": 47.0, "epoch": 2.241250930752048, "grad_norm": 0.004603095260629693, "kl": 0.32763671875, "learning_rate": 8.89033938332872e-07, "loss": 0.00032782345078885555, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1504, "train_speed(iter/s)": 0.029606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 87.93750381469727, "completions/min_length": 48.0, "epoch": 2.2427401340282946, "grad_norm": 1.741562562285655, "kl": 0.3701171875, "learning_rate": 8.888853336695556e-07, "loss": -0.004390565678477287, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1505, "train_speed(iter/s)": 0.029607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 87.22916793823242, "completions/min_length": 46.5, "epoch": 2.244229337304542, "grad_norm": 0.004374393528263997, "kl": 0.31005859375, "learning_rate": 8.887366420053764e-07, "loss": 0.0003099583263974637, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1506, "train_speed(iter/s)": 0.029608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 86.94791984558105, "completions/min_length": 43.25, "epoch": 2.245718540580789, "grad_norm": 1.693367065814237, "kl": 2.03173828125, "learning_rate": 8.885878633735997e-07, "loss": 0.014432987198233604, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.48409245908260345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1507, "train_speed(iter/s)": 0.029608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 87.82292175292969, "completions/min_length": 47.0, "epoch": 2.2472077438570364, "grad_norm": 0.0042289253149762, "kl": 0.326171875, "learning_rate": 8.884389978075097e-07, "loss": 0.0003257602802477777, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1508, "train_speed(iter/s)": 0.029606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.25, "completions/mean_length": 83.59375190734863, "completions/min_length": 39.75, "epoch": 2.2486969471332836, "grad_norm": 2.1679296186703785, "kl": 0.34521484375, "learning_rate": 8.882900453404107e-07, "loss": 0.0033505717292428017, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1509, "train_speed(iter/s)": 0.029608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.5, "completions/mean_length": 83.16666793823242, "completions/min_length": 45.0, "epoch": 2.250186150409531, "grad_norm": 0.004113508687514287, "kl": 0.326171875, "learning_rate": 8.88141006005626e-07, "loss": 0.0003266563580837101, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1510, "train_speed(iter/s)": 0.029615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 85.12500190734863, "completions/min_length": 43.75, "epoch": 2.251675353685778, "grad_norm": 0.00416499982031261, "kl": 0.322265625, "learning_rate": 8.879918798364984e-07, "loss": 0.0003221642109565437, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1511, "train_speed(iter/s)": 0.029616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 93.41666984558105, "completions/min_length": 56.5, "epoch": 2.2531645569620253, "grad_norm": 0.0039501895819925, "kl": 0.30029296875, "learning_rate": 8.878426668663902e-07, "loss": 0.00029975309735164046, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1512, "train_speed(iter/s)": 0.029616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.5, "completions/mean_length": 81.86458778381348, "completions/min_length": 48.25, "epoch": 2.2546537602382726, "grad_norm": 1.5043122024931617, "kl": 0.32666015625, "learning_rate": 8.876933671286831e-07, "loss": -0.00101709074806422, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1513, "train_speed(iter/s)": 0.029614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 86.20833587646484, "completions/min_length": 49.25, "epoch": 2.25614296351452, "grad_norm": 0.004321666308631641, "kl": 0.318359375, "learning_rate": 8.875439806567785e-07, "loss": 0.0003184950619470328, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1514, "train_speed(iter/s)": 0.029612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 86.61458587646484, "completions/min_length": 45.75, "epoch": 2.257632166790767, "grad_norm": 0.9517755272537284, "kl": 0.31640625, "learning_rate": 8.873945074840965e-07, "loss": 0.005406049080193043, "memory(GiB)": 112.53, "reward": 1.5104166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1515, "train_speed(iter/s)": 0.029602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.25, "completions/mean_length": 85.93750190734863, "completions/min_length": 44.75, "epoch": 2.2591213700670143, "grad_norm": 0.004365549953683957, "kl": 0.30712890625, "learning_rate": 8.872449476440773e-07, "loss": 0.0003069897065870464, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1516, "train_speed(iter/s)": 0.029608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 85.70833587646484, "completions/min_length": 51.75, "epoch": 2.2606105733432615, "grad_norm": 0.004126144423080838, "kl": 0.30908203125, "learning_rate": 8.870953011701803e-07, "loss": 0.00030867347959429026, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1517, "train_speed(iter/s)": 0.029615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 86.56250190734863, "completions/min_length": 45.75, "epoch": 2.2620997766195083, "grad_norm": 0.004203505312770756, "kl": 0.31396484375, "learning_rate": 8.869455680958838e-07, "loss": 0.0003139724722132087, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1518, "train_speed(iter/s)": 0.02961 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.75, "completions/mean_length": 80.56250190734863, "completions/min_length": 43.75, "epoch": 2.2635889798957556, "grad_norm": 0.004487635626158752, "kl": 0.3115234375, "learning_rate": 8.867957484546861e-07, "loss": 0.00031149384449236095, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1519, "train_speed(iter/s)": 0.029606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 86.86458778381348, "completions/min_length": 43.0, "epoch": 2.265078183172003, "grad_norm": 2.787535131616536, "kl": 0.298828125, "learning_rate": 8.866458422801047e-07, "loss": -0.006702038459479809, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3307463824748993, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1520, "train_speed(iter/s)": 0.029613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 88.77083396911621, "completions/min_length": 47.5, "epoch": 2.26656738644825, "grad_norm": 1.7589429832857564, "kl": 0.4765625, "learning_rate": 8.864958496056763e-07, "loss": -0.007704173214733601, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1521, "train_speed(iter/s)": 0.029597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 84.14583587646484, "completions/min_length": 41.5, "epoch": 2.2680565897244973, "grad_norm": 0.004737335048795302, "kl": 0.322265625, "learning_rate": 8.86345770464957e-07, "loss": 0.0003222521336283535, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1522, "train_speed(iter/s)": 0.029604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 83.22916984558105, "completions/min_length": 45.75, "epoch": 2.2695457930007445, "grad_norm": 0.7884505286058883, "kl": 0.306640625, "learning_rate": 8.861956048915224e-07, "loss": -0.010853450745344162, "memory(GiB)": 112.53, "reward": 1.4895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1523, "train_speed(iter/s)": 0.02961 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.25, "completions/mean_length": 84.60416984558105, "completions/min_length": 43.75, "epoch": 2.271034996276992, "grad_norm": 0.004891219679133465, "kl": 0.29833984375, "learning_rate": 8.860453529189673e-07, "loss": 0.00029860553331673145, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1524, "train_speed(iter/s)": 0.029612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 84.91666984558105, "completions/min_length": 45.0, "epoch": 2.272524199553239, "grad_norm": 0.005400611225813561, "kl": 0.3173828125, "learning_rate": 8.858950145809059e-07, "loss": 0.00031736071105115116, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1525, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.5, "completions/mean_length": 86.33333587646484, "completions/min_length": 47.25, "epoch": 2.2740134028294863, "grad_norm": 0.004819653001702606, "kl": 0.3359375, "learning_rate": 8.857445899109715e-07, "loss": 0.00033560849260538816, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1526, "train_speed(iter/s)": 0.029602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 86.28125381469727, "completions/min_length": 48.75, "epoch": 2.2755026061057335, "grad_norm": 0.9562177242890637, "kl": 0.3125, "learning_rate": 8.855940789428169e-07, "loss": -0.013972323387861252, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1527, "train_speed(iter/s)": 0.029604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 86.88541984558105, "completions/min_length": 50.5, "epoch": 2.2769918093819808, "grad_norm": 0.9796209162862062, "kl": 0.33154296875, "learning_rate": 8.854434817101144e-07, "loss": -0.010650108568370342, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1528, "train_speed(iter/s)": 0.029599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.0, "completions/mean_length": 83.19791793823242, "completions/min_length": 51.25, "epoch": 2.278481012658228, "grad_norm": 0.0040848075024846825, "kl": 0.333984375, "learning_rate": 8.852927982465552e-07, "loss": 0.00033406211878173053, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1529, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 83.20833587646484, "completions/min_length": 43.25, "epoch": 2.2799702159344752, "grad_norm": 1.0853938349137455, "kl": 0.33447265625, "learning_rate": 8.851420285858502e-07, "loss": -0.0060829161666333675, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1530, "train_speed(iter/s)": 0.02959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 89.22916793823242, "completions/min_length": 44.5, "epoch": 2.281459419210722, "grad_norm": 0.9358630356401834, "kl": 0.3291015625, "learning_rate": 8.849911727617292e-07, "loss": 0.001572601730003953, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1531, "train_speed(iter/s)": 0.029591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 88.20833396911621, "completions/min_length": 48.75, "epoch": 2.2829486224869693, "grad_norm": 0.0040297181977926875, "kl": 0.31982421875, "learning_rate": 8.848402308079414e-07, "loss": 0.00031966122332960367, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1532, "train_speed(iter/s)": 0.029592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 85.12500190734863, "completions/min_length": 39.5, "epoch": 2.2844378257632165, "grad_norm": 0.004659931888044724, "kl": 0.30859375, "learning_rate": 8.846892027582554e-07, "loss": 0.0003087228396907449, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1533, "train_speed(iter/s)": 0.029598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 89.13541984558105, "completions/min_length": 45.75, "epoch": 2.2859270290394638, "grad_norm": 0.0042297039534658016, "kl": 0.32861328125, "learning_rate": 8.845380886464591e-07, "loss": 0.0003284516278654337, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1534, "train_speed(iter/s)": 0.0296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.75, "completions/mean_length": 86.06250190734863, "completions/min_length": 44.5, "epoch": 2.287416232315711, "grad_norm": 0.00374471209428985, "kl": 0.3251953125, "learning_rate": 8.843868885063592e-07, "loss": 0.00032553993514738977, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1535, "train_speed(iter/s)": 0.029601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 87.42708778381348, "completions/min_length": 48.5, "epoch": 2.2889054355919582, "grad_norm": 0.004593997631745019, "kl": 0.31884765625, "learning_rate": 8.842356023717823e-07, "loss": 0.0003190205607097596, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1536, "train_speed(iter/s)": 0.029602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 89.41666984558105, "completions/min_length": 54.0, "epoch": 2.2903946388682055, "grad_norm": 0.004752600299052942, "kl": 0.314453125, "learning_rate": 8.840842302765738e-07, "loss": 0.0003143146459478885, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1537, "train_speed(iter/s)": 0.029604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 87.12500381469727, "completions/min_length": 39.75, "epoch": 2.2918838421444527, "grad_norm": 0.003978272047946928, "kl": 0.32421875, "learning_rate": 8.839327722545984e-07, "loss": 0.00032423780066892505, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1538, "train_speed(iter/s)": 0.029605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 86.18750190734863, "completions/min_length": 40.75, "epoch": 2.2933730454207, "grad_norm": 0.006379693282369447, "kl": 0.35400390625, "learning_rate": 8.837812283397401e-07, "loss": 0.0003529661044012755, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1539, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 91.41666793823242, "completions/min_length": 49.5, "epoch": 2.294862248696947, "grad_norm": 0.9435964260784572, "kl": 0.3037109375, "learning_rate": 8.836295985659022e-07, "loss": -0.004418095573782921, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1540, "train_speed(iter/s)": 0.029598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.25, "completions/mean_length": 90.17708587646484, "completions/min_length": 49.0, "epoch": 2.2963514519731945, "grad_norm": 0.0056416601789242085, "kl": 0.3251953125, "learning_rate": 8.834778829670069e-07, "loss": 0.0003247943241149187, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1541, "train_speed(iter/s)": 0.029594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 87.98958587646484, "completions/min_length": 44.25, "epoch": 2.2978406552494417, "grad_norm": 0.005017900379922321, "kl": 0.32763671875, "learning_rate": 8.833260815769958e-07, "loss": 0.000327474030200392, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1542, "train_speed(iter/s)": 0.029595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 90.39583587646484, "completions/min_length": 43.0, "epoch": 2.299329858525689, "grad_norm": 0.004147643588303597, "kl": 0.3193359375, "learning_rate": 8.831741944298299e-07, "loss": 0.0003190705901943147, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1543, "train_speed(iter/s)": 0.02959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 87.03125190734863, "completions/min_length": 47.0, "epoch": 2.3008190618019357, "grad_norm": 2.720119590292928, "kl": 0.33740234375, "learning_rate": 8.83022221559489e-07, "loss": -0.011399097740650177, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1544, "train_speed(iter/s)": 0.029591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 90.70833778381348, "completions/min_length": 50.0, "epoch": 2.302308265078183, "grad_norm": 0.9645508037652107, "kl": 0.333984375, "learning_rate": 8.828701629999722e-07, "loss": 0.017221566289663315, "memory(GiB)": 112.53, "reward": 1.5520834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1545, "train_speed(iter/s)": 0.029586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 85.87500190734863, "completions/min_length": 47.25, "epoch": 2.3037974683544302, "grad_norm": 0.004811611941272983, "kl": 0.34130859375, "learning_rate": 8.827180187852981e-07, "loss": 0.0003404735471121967, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1546, "train_speed(iter/s)": 0.029589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.75, "completions/mean_length": 89.69791984558105, "completions/min_length": 49.5, "epoch": 2.3052866716306775, "grad_norm": 1.7394029813447744, "kl": 0.326171875, "learning_rate": 8.825657889495037e-07, "loss": -0.004391385242342949, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1547, "train_speed(iter/s)": 0.029588 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.5, "completions/mean_length": 91.59375190734863, "completions/min_length": 56.75, "epoch": 2.3067758749069247, "grad_norm": 0.004201280498222505, "kl": 0.3046875, "learning_rate": 8.824134735266462e-07, "loss": 0.0003043440810870379, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1548, "train_speed(iter/s)": 0.029586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.75, "completions/mean_length": 87.82291793823242, "completions/min_length": 50.25, "epoch": 2.308265078183172, "grad_norm": 0.005713838954734592, "kl": 0.32958984375, "learning_rate": 8.822610725508011e-07, "loss": 0.0003293965710327029, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1549, "train_speed(iter/s)": 0.029588 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 89.17708587646484, "completions/min_length": 52.0, "epoch": 2.309754281459419, "grad_norm": 0.006632014204298506, "kl": 0.3193359375, "learning_rate": 8.821085860560632e-07, "loss": 0.0003194283926859498, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1550, "train_speed(iter/s)": 0.029588 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 85.51041984558105, "completions/min_length": 51.0, "epoch": 2.3112434847356664, "grad_norm": 1.233938155295133, "kl": 0.32666015625, "learning_rate": 8.819560140765468e-07, "loss": 0.01351000927388668, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1551, "train_speed(iter/s)": 0.029591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.75, "completions/mean_length": 94.09375190734863, "completions/min_length": 51.0, "epoch": 2.3127326880119137, "grad_norm": 0.004204150335194349, "kl": 0.27294921875, "learning_rate": 8.81803356646385e-07, "loss": 0.00027298351051285863, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1552, "train_speed(iter/s)": 0.029597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 82.86458587646484, "completions/min_length": 45.25, "epoch": 2.314221891288161, "grad_norm": 1.8477635471254608, "kl": 0.33251953125, "learning_rate": 8.816506137997299e-07, "loss": 0.005683492869138718, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1553, "train_speed(iter/s)": 0.029598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 88.78125381469727, "completions/min_length": 46.25, "epoch": 2.315711094564408, "grad_norm": 0.00427665398555975, "kl": 0.3232421875, "learning_rate": 8.814977855707533e-07, "loss": 0.00032317452132701874, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1554, "train_speed(iter/s)": 0.02959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.0, "completions/mean_length": 93.85416984558105, "completions/min_length": 57.75, "epoch": 2.3172002978406554, "grad_norm": 0.004272032593252262, "kl": 0.30712890625, "learning_rate": 8.813448719936454e-07, "loss": 0.0003071255050599575, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1555, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.5, "completions/mean_length": 82.64583587646484, "completions/min_length": 42.5, "epoch": 2.3186895011169026, "grad_norm": 1.018185260381018, "kl": 0.318359375, "learning_rate": 8.811918731026158e-07, "loss": 0.0024456526152789593, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1556, "train_speed(iter/s)": 0.029598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 86.63541793823242, "completions/min_length": 44.25, "epoch": 2.3201787043931494, "grad_norm": 0.004334879110379179, "kl": 0.32958984375, "learning_rate": 8.810387889318933e-07, "loss": 0.00032942130928859115, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1557, "train_speed(iter/s)": 0.029604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 85.56250381469727, "completions/min_length": 45.5, "epoch": 2.3216679076693967, "grad_norm": 1.1223242276483019, "kl": 0.33984375, "learning_rate": 8.808856195157259e-07, "loss": -0.013608403503894806, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1558, "train_speed(iter/s)": 0.029606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 86.53125381469727, "completions/min_length": 38.25, "epoch": 2.323157110945644, "grad_norm": 0.03846412450480176, "kl": 0.34130859375, "learning_rate": 8.8073236488838e-07, "loss": 0.00034128554398193955, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1559, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.25, "completions/mean_length": 84.44791984558105, "completions/min_length": 47.75, "epoch": 2.324646314221891, "grad_norm": 0.004397327417819854, "kl": 0.3076171875, "learning_rate": 8.805790250841417e-07, "loss": 0.0003079719317611307, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1560, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 89.37500381469727, "completions/min_length": 49.5, "epoch": 2.3261355174981384, "grad_norm": 1.508982096711342, "kl": 0.3720703125, "learning_rate": 8.804256001373161e-07, "loss": -0.0030036168172955513, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1561, "train_speed(iter/s)": 0.029591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 88.65625, "completions/min_length": 50.25, "epoch": 2.3276247207743856, "grad_norm": 0.004420736525572771, "kl": 0.3388671875, "learning_rate": 8.802720900822269e-07, "loss": 0.00033919617999345064, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1562, "train_speed(iter/s)": 0.029587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 91.31250190734863, "completions/min_length": 45.75, "epoch": 2.329113924050633, "grad_norm": 0.004160790323736489, "kl": 0.31201171875, "learning_rate": 8.801184949532174e-07, "loss": 0.00031204562401399016, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1563, "train_speed(iter/s)": 0.029588 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 94.26042175292969, "completions/min_length": 51.75, "epoch": 2.33060312732688, "grad_norm": 0.004100942817568803, "kl": 0.30517578125, "learning_rate": 8.799648147846496e-07, "loss": 0.00030481687281280756, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1564, "train_speed(iter/s)": 0.029594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 85.58333587646484, "completions/min_length": 51.5, "epoch": 2.3320923306031274, "grad_norm": 1.512430274402743, "kl": 0.31982421875, "learning_rate": 8.798110496109046e-07, "loss": -0.007741433568298817, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1565, "train_speed(iter/s)": 0.029595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 85.75000190734863, "completions/min_length": 45.0, "epoch": 2.3335815338793746, "grad_norm": 0.0045582593856089, "kl": 0.33642578125, "learning_rate": 8.796571994663824e-07, "loss": 0.00033648122916929424, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1566, "train_speed(iter/s)": 0.029585 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 126.0, "completions/mean_length": 80.70833587646484, "completions/min_length": 46.75, "epoch": 2.335070737155622, "grad_norm": 0.004358769362624213, "kl": 0.3125, "learning_rate": 8.795032643855024e-07, "loss": 0.0003130432160105556, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1567, "train_speed(iter/s)": 0.029592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 92.54166984558105, "completions/min_length": 50.25, "epoch": 2.336559940431869, "grad_norm": 0.004092948336944274, "kl": 0.3037109375, "learning_rate": 8.793492444027026e-07, "loss": 0.0003035983245354146, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1568, "train_speed(iter/s)": 0.029588 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 85.65625190734863, "completions/min_length": 47.0, "epoch": 2.3380491437081163, "grad_norm": 0.004632197961254373, "kl": 0.33154296875, "learning_rate": 8.7919513955244e-07, "loss": 0.0003312409098725766, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1569, "train_speed(iter/s)": 0.029594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 92.07291984558105, "completions/min_length": 43.5, "epoch": 2.339538346984363, "grad_norm": 1.0048683554632216, "kl": 0.32080078125, "learning_rate": 8.790409498691909e-07, "loss": 0.018076375126838684, "memory(GiB)": 112.53, "reward": 1.5104166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1570, "train_speed(iter/s)": 0.029596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 87.33333396911621, "completions/min_length": 42.75, "epoch": 2.3410275502606104, "grad_norm": 2.0309840405501385, "kl": 0.32666015625, "learning_rate": 8.788866753874502e-07, "loss": 0.004281629808247089, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1571, "train_speed(iter/s)": 0.029587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 89.40625190734863, "completions/min_length": 56.0, "epoch": 2.3425167535368576, "grad_norm": 0.004021778286238018, "kl": 0.33203125, "learning_rate": 8.787323161417322e-07, "loss": 0.00033222930505871773, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1572, "train_speed(iter/s)": 0.029583 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.25, "completions/mean_length": 85.87500381469727, "completions/min_length": 42.0, "epoch": 2.344005956813105, "grad_norm": 0.004202408688117367, "kl": 0.337890625, "learning_rate": 8.785778721665698e-07, "loss": 0.00033847917802631855, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1573, "train_speed(iter/s)": 0.029578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 87.04166984558105, "completions/min_length": 40.25, "epoch": 2.345495160089352, "grad_norm": 0.003908188851170269, "kl": 0.32958984375, "learning_rate": 8.784233434965148e-07, "loss": 0.00033022285788320005, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1574, "train_speed(iter/s)": 0.029572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.25, "completions/mean_length": 84.71875, "completions/min_length": 44.0, "epoch": 2.3469843633655993, "grad_norm": 1.4634272262755517, "kl": 0.3271484375, "learning_rate": 8.782687301661384e-07, "loss": -0.008288415148854256, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1575, "train_speed(iter/s)": 0.029573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.75, "completions/mean_length": 84.88541984558105, "completions/min_length": 42.0, "epoch": 2.3484735666418466, "grad_norm": 1.103453179492889, "kl": 0.3251953125, "learning_rate": 8.781140322100305e-07, "loss": 0.0018669571727514267, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1576, "train_speed(iter/s)": 0.029574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 85.70833396911621, "completions/min_length": 41.25, "epoch": 2.349962769918094, "grad_norm": 0.688591501646129, "kl": 0.31005859375, "learning_rate": 8.779592496627998e-07, "loss": -0.0044057779014110565, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1577, "train_speed(iter/s)": 0.029577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.25, "completions/mean_length": 89.10416984558105, "completions/min_length": 50.75, "epoch": 2.351451973194341, "grad_norm": 3.5571663743142006, "kl": 0.357421875, "learning_rate": 8.778043825590739e-07, "loss": 0.007577329874038696, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1578, "train_speed(iter/s)": 0.029567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 91.14583587646484, "completions/min_length": 46.5, "epoch": 2.3529411764705883, "grad_norm": 1.4044001538292483, "kl": 0.30517578125, "learning_rate": 8.776494309334997e-07, "loss": 0.010218925774097443, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1579, "train_speed(iter/s)": 0.029565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 91.48958587646484, "completions/min_length": 45.5, "epoch": 2.3544303797468356, "grad_norm": 0.0037948459292234094, "kl": 0.31103515625, "learning_rate": 8.774943948207425e-07, "loss": 0.00031161156948655844, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1580, "train_speed(iter/s)": 0.029571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.0, "completions/mean_length": 89.18750381469727, "completions/min_length": 45.5, "epoch": 2.355919583023083, "grad_norm": 0.004154369765916394, "kl": 0.32080078125, "learning_rate": 8.773392742554868e-07, "loss": 0.0003213330346625298, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1581, "train_speed(iter/s)": 0.029571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 91.51041984558105, "completions/min_length": 51.75, "epoch": 2.35740878629933, "grad_norm": 0.004297394753450446, "kl": 0.32373046875, "learning_rate": 8.771840692724361e-07, "loss": 0.00032380849006585777, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1582, "train_speed(iter/s)": 0.029566 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 87.59375381469727, "completions/min_length": 41.5, "epoch": 2.358897989575577, "grad_norm": 2.8592921977689314, "kl": 3.06787109375, "learning_rate": 8.770287799063127e-07, "loss": 0.005797242280095816, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.4888460487127304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1583, "train_speed(iter/s)": 0.029564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 86.51042175292969, "completions/min_length": 41.25, "epoch": 2.3603871928518245, "grad_norm": 0.0037867841603385836, "kl": 0.30517578125, "learning_rate": 8.768734061918574e-07, "loss": 0.0003044687327928841, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1584, "train_speed(iter/s)": 0.029566 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 131.25, "completions/mean_length": 82.53125381469727, "completions/min_length": 38.25, "epoch": 2.3618763961280713, "grad_norm": 0.0043621621494050735, "kl": 0.3505859375, "learning_rate": 8.767179481638303e-07, "loss": 0.0003506880020722747, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1585, "train_speed(iter/s)": 0.029573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 85.00000190734863, "completions/min_length": 38.5, "epoch": 2.3633655994043186, "grad_norm": 0.0037200003246233796, "kl": 0.33251953125, "learning_rate": 8.765624058570105e-07, "loss": 0.00033236455055885017, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1586, "train_speed(iter/s)": 0.029574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 88.02083587646484, "completions/min_length": 48.25, "epoch": 2.364854802680566, "grad_norm": 1.0526448127683312, "kl": 0.31103515625, "learning_rate": 8.764067793061953e-07, "loss": 0.004873606376349926, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1587, "train_speed(iter/s)": 0.029575 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 89.72916984558105, "completions/min_length": 46.75, "epoch": 2.366344005956813, "grad_norm": 0.004310499846212197, "kl": 0.3212890625, "learning_rate": 8.762510685462015e-07, "loss": 0.00032146097510121763, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1588, "train_speed(iter/s)": 0.02957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 89.53125381469727, "completions/min_length": 44.25, "epoch": 2.3678332092330603, "grad_norm": 0.004120874571452266, "kl": 0.31689453125, "learning_rate": 8.760952736118643e-07, "loss": 0.0003166199312545359, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1589, "train_speed(iter/s)": 0.029571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.5, "completions/mean_length": 79.38541793823242, "completions/min_length": 37.75, "epoch": 2.3693224125093075, "grad_norm": 0.004178779089258788, "kl": 0.35986328125, "learning_rate": 8.759393945380381e-07, "loss": 0.0003591629210859537, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1590, "train_speed(iter/s)": 0.029577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 90.26041793823242, "completions/min_length": 42.5, "epoch": 2.3708116157855548, "grad_norm": 0.6810521202989299, "kl": 0.31396484375, "learning_rate": 8.75783431359596e-07, "loss": -0.009872698225080967, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1591, "train_speed(iter/s)": 0.029579 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 129.75, "completions/mean_length": 85.21875190734863, "completions/min_length": 48.0, "epoch": 2.372300819061802, "grad_norm": 0.004025627436987142, "kl": 0.34423828125, "learning_rate": 8.756273841114296e-07, "loss": 0.0003440398722887039, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1592, "train_speed(iter/s)": 0.029576 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.0, "completions/mean_length": 85.88541793823242, "completions/min_length": 47.0, "epoch": 2.3737900223380493, "grad_norm": 0.004545229822651992, "kl": 0.33154296875, "learning_rate": 8.754712528284497e-07, "loss": 0.00033161742612719536, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1593, "train_speed(iter/s)": 0.029573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 133.75, "completions/mean_length": 90.72916793823242, "completions/min_length": 47.25, "epoch": 2.3752792256142965, "grad_norm": 1.508886445177863, "kl": 0.36669921875, "learning_rate": 8.753150375455858e-07, "loss": -0.006357462145388126, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3753186762332916, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1594, "train_speed(iter/s)": 0.029569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 86.38541793823242, "completions/min_length": 38.0, "epoch": 2.3767684288905437, "grad_norm": 1.929118203609297, "kl": 0.3349609375, "learning_rate": 8.751587382977861e-07, "loss": -0.0054902187548577785, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1595, "train_speed(iter/s)": 0.02957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.5, "completions/mean_length": 84.92708587646484, "completions/min_length": 35.5, "epoch": 2.3782576321667905, "grad_norm": 1.5971572785584345, "kl": 0.3466796875, "learning_rate": 8.750023551200176e-07, "loss": -0.0011372349690645933, "memory(GiB)": 112.53, "reward": 1.4479166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.4479166716337204, "rewards/CineAccuracyORM/std": 0.24953637272119522, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1596, "train_speed(iter/s)": 0.029566 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 90.23958396911621, "completions/min_length": 43.0, "epoch": 2.379746835443038, "grad_norm": 2.086616625230922, "kl": 0.32275390625, "learning_rate": 8.748458880472662e-07, "loss": 0.0008427593857049942, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1597, "train_speed(iter/s)": 0.029561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 89.55208587646484, "completions/min_length": 41.0, "epoch": 2.381236038719285, "grad_norm": 0.0037109536144799447, "kl": 0.32763671875, "learning_rate": 8.746893371145365e-07, "loss": 0.00032769545214250684, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1598, "train_speed(iter/s)": 0.029557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.5, "completions/mean_length": 91.78125190734863, "completions/min_length": 52.75, "epoch": 2.3827252419955323, "grad_norm": 0.00402045150182291, "kl": 0.341796875, "learning_rate": 8.745327023568518e-07, "loss": 0.00034193374449387193, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1599, "train_speed(iter/s)": 0.029557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.5, "completions/mean_length": 85.35416793823242, "completions/min_length": 45.75, "epoch": 2.3842144452717795, "grad_norm": 0.003789235027578061, "kl": 0.34765625, "learning_rate": 8.743759838092544e-07, "loss": 0.0003486938076093793, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1600, "train_speed(iter/s)": 0.029559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 92.02083587646484, "completions/min_length": 48.75, "epoch": 2.3857036485480267, "grad_norm": 0.9440441637887852, "kl": 0.32421875, "learning_rate": 8.742191815068047e-07, "loss": -0.005281728692352772, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1601, "train_speed(iter/s)": 0.029565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 87.75000190734863, "completions/min_length": 47.0, "epoch": 2.387192851824274, "grad_norm": 0.003922812174002598, "kl": 0.35693359375, "learning_rate": 8.740622954845825e-07, "loss": 0.0003572672139853239, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1602, "train_speed(iter/s)": 0.029551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 90.27083587646484, "completions/min_length": 46.25, "epoch": 2.3886820551005212, "grad_norm": 1.5154443751146414, "kl": 0.33251953125, "learning_rate": 8.739053257776863e-07, "loss": -0.0032263933680951595, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1603, "train_speed(iter/s)": 0.029557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 137.25, "completions/mean_length": 88.59375190734863, "completions/min_length": 49.5, "epoch": 2.3901712583767685, "grad_norm": 0.7190556867194795, "kl": 0.33544921875, "learning_rate": 8.737482724212331e-07, "loss": -0.0017193618696182966, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1604, "train_speed(iter/s)": 0.029559 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 88.93750190734863, "completions/min_length": 47.25, "epoch": 2.3916604616530157, "grad_norm": 1.2941834703213915, "kl": 0.36572265625, "learning_rate": 8.735911354503582e-07, "loss": -0.01179949939250946, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1605, "train_speed(iter/s)": 0.029561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 88.05208778381348, "completions/min_length": 45.5, "epoch": 2.393149664929263, "grad_norm": 1.6712684708256504, "kl": 0.34423828125, "learning_rate": 8.734339149002166e-07, "loss": -0.0077113802544772625, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.08311937749385834, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1606, "train_speed(iter/s)": 0.029566 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.75, "completions/mean_length": 95.37500190734863, "completions/min_length": 52.5, "epoch": 2.39463886820551, "grad_norm": 0.7513421703266151, "kl": 0.31396484375, "learning_rate": 8.732766108059812e-07, "loss": 0.0016143833054229617, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1607, "train_speed(iter/s)": 0.029567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.75, "completions/mean_length": 89.05208396911621, "completions/min_length": 50.25, "epoch": 2.3961280714817574, "grad_norm": 1.2892486471983347, "kl": 0.33251953125, "learning_rate": 8.73119223202844e-07, "loss": -0.0007259302074089646, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1608, "train_speed(iter/s)": 0.029564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.0, "completions/mean_length": 96.58333587646484, "completions/min_length": 54.0, "epoch": 2.3976172747580042, "grad_norm": 1.263954561681081, "kl": 0.33544921875, "learning_rate": 8.729617521260153e-07, "loss": 0.004802495241165161, "memory(GiB)": 112.53, "reward": 1.4687500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.4687500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1609, "train_speed(iter/s)": 0.02956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 89.18750190734863, "completions/min_length": 53.75, "epoch": 2.399106478034252, "grad_norm": 0.9229283328881017, "kl": 0.353515625, "learning_rate": 8.728041976107245e-07, "loss": 0.00395477470010519, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1610, "train_speed(iter/s)": 0.029556 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.75, "completions/mean_length": 101.98958778381348, "completions/min_length": 62.75, "epoch": 2.4005956813104987, "grad_norm": 1.0932287229298403, "kl": 0.32666015625, "learning_rate": 8.726465596922194e-07, "loss": 0.025737211108207703, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4681183323264122, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1611, "train_speed(iter/s)": 0.029548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 98.55208778381348, "completions/min_length": 61.5, "epoch": 2.402084884586746, "grad_norm": 0.004203823500497726, "kl": 0.3056640625, "learning_rate": 8.724888384057663e-07, "loss": 0.0003058544534724206, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1612, "train_speed(iter/s)": 0.029554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 93.06250190734863, "completions/min_length": 49.0, "epoch": 2.403574087862993, "grad_norm": 0.005586318852651672, "kl": 0.35009765625, "learning_rate": 8.723310337866507e-07, "loss": 0.00035098203807137907, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1613, "train_speed(iter/s)": 0.029555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 98.21875190734863, "completions/min_length": 61.0, "epoch": 2.4050632911392404, "grad_norm": 0.004016299767482953, "kl": 0.33642578125, "learning_rate": 8.721731458701764e-07, "loss": 0.0003362236893735826, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1614, "train_speed(iter/s)": 0.029552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 103.88541793823242, "completions/min_length": 58.75, "epoch": 2.4065524944154877, "grad_norm": 0.7313397253257746, "kl": 0.32958984375, "learning_rate": 8.720151746916658e-07, "loss": 0.020082540810108185, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1615, "train_speed(iter/s)": 0.029541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 89.84375381469727, "completions/min_length": 51.75, "epoch": 2.408041697691735, "grad_norm": 1.2917331690282148, "kl": 0.3447265625, "learning_rate": 8.718571202864597e-07, "loss": 0.004484078846871853, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.42095326259732246, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1616, "train_speed(iter/s)": 0.029544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 92.92708778381348, "completions/min_length": 50.0, "epoch": 2.409530900967982, "grad_norm": 0.8289982626287861, "kl": 0.34423828125, "learning_rate": 8.716989826899183e-07, "loss": 0.005382562056183815, "memory(GiB)": 112.53, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1617, "train_speed(iter/s)": 0.02955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 92.44791793823242, "completions/min_length": 55.0, "epoch": 2.4110201042442294, "grad_norm": 1.450055114446775, "kl": 0.34765625, "learning_rate": 8.715407619374195e-07, "loss": -0.004860882181674242, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1618, "train_speed(iter/s)": 0.029551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.75, "completions/mean_length": 93.15625190734863, "completions/min_length": 58.5, "epoch": 2.4125093075204767, "grad_norm": 0.0045982170663459564, "kl": 0.34716796875, "learning_rate": 8.713824580643605e-07, "loss": 0.000347638240782544, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1619, "train_speed(iter/s)": 0.029557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 93.10416984558105, "completions/min_length": 52.5, "epoch": 2.413998510796724, "grad_norm": 0.9109981592681925, "kl": 0.34765625, "learning_rate": 8.712240711061566e-07, "loss": -0.0025154794566333294, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1620, "train_speed(iter/s)": 0.029547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 92.93750190734863, "completions/min_length": 51.0, "epoch": 2.415487714072971, "grad_norm": 0.004151890259428019, "kl": 0.3515625, "learning_rate": 8.71065601098242e-07, "loss": 0.00035094525082968175, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1621, "train_speed(iter/s)": 0.029543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 91.54166984558105, "completions/min_length": 51.0, "epoch": 2.416976917349218, "grad_norm": 0.004500937386099419, "kl": 0.33740234375, "learning_rate": 8.709070480760694e-07, "loss": 0.00033699505729600787, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1622, "train_speed(iter/s)": 0.029549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 89.89583396911621, "completions/min_length": 45.75, "epoch": 2.4184661206254656, "grad_norm": 0.004266881514367388, "kl": 0.3369140625, "learning_rate": 8.707484120751101e-07, "loss": 0.0003369541373103857, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1623, "train_speed(iter/s)": 0.029555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.75, "completions/mean_length": 90.53125190734863, "completions/min_length": 50.25, "epoch": 2.4199553239017124, "grad_norm": 0.9761500743167483, "kl": 0.35693359375, "learning_rate": 8.705896931308537e-07, "loss": 0.0023736522998660803, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.2973194234073162, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1624, "train_speed(iter/s)": 0.029561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 91.34375381469727, "completions/min_length": 52.5, "epoch": 2.4214445271779597, "grad_norm": 0.004532243272076316, "kl": 0.35986328125, "learning_rate": 8.704308912788088e-07, "loss": 0.00035970041062682867, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1625, "train_speed(iter/s)": 0.029561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 92.11458778381348, "completions/min_length": 50.25, "epoch": 2.422933730454207, "grad_norm": 0.7765120689239775, "kl": 0.322265625, "learning_rate": 8.702720065545023e-07, "loss": 0.003813492599874735, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1626, "train_speed(iter/s)": 0.029561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 95.83333587646484, "completions/min_length": 52.0, "epoch": 2.424422933730454, "grad_norm": 1.056965756579626, "kl": 0.3359375, "learning_rate": 8.701130389934794e-07, "loss": 0.004881467670202255, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.16161249950528145, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1627, "train_speed(iter/s)": 0.029547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 91.73958587646484, "completions/min_length": 51.0, "epoch": 2.4259121370067014, "grad_norm": 1.0399232624557362, "kl": 0.3583984375, "learning_rate": 8.699539886313046e-07, "loss": 0.005613978486508131, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.08311937749385834, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1628, "train_speed(iter/s)": 0.029543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 97.60416984558105, "completions/min_length": 60.5, "epoch": 2.4274013402829486, "grad_norm": 1.2794586175529445, "kl": 0.333984375, "learning_rate": 8.697948555035598e-07, "loss": 0.013731738552451134, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.32092025876045227, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1629, "train_speed(iter/s)": 0.029539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 94.84375190734863, "completions/min_length": 56.25, "epoch": 2.428890543559196, "grad_norm": 0.8210232219188092, "kl": 0.34130859375, "learning_rate": 8.696356396458464e-07, "loss": 0.010570462793111801, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1630, "train_speed(iter/s)": 0.029535 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.5, "completions/mean_length": 96.29166984558105, "completions/min_length": 59.25, "epoch": 2.430379746835443, "grad_norm": 0.004844465155684671, "kl": 0.32373046875, "learning_rate": 8.69476341093784e-07, "loss": 0.00032406821264885366, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1631, "train_speed(iter/s)": 0.029536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 92.01041984558105, "completions/min_length": 48.5, "epoch": 2.4318689501116904, "grad_norm": 0.733670930931743, "kl": 0.33251953125, "learning_rate": 8.693169598830103e-07, "loss": -0.0010108796413987875, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1632, "train_speed(iter/s)": 0.029527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 97.82291984558105, "completions/min_length": 55.0, "epoch": 2.4333581533879376, "grad_norm": 0.004333313247139547, "kl": 0.353515625, "learning_rate": 8.691574960491823e-07, "loss": 0.0003538720484357327, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1633, "train_speed(iter/s)": 0.029518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 95.83333587646484, "completions/min_length": 56.75, "epoch": 2.434847356664185, "grad_norm": 0.004725202925944114, "kl": 0.3232421875, "learning_rate": 8.689979496279746e-07, "loss": 0.00032410427229478955, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1634, "train_speed(iter/s)": 0.029523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 91.18750190734863, "completions/min_length": 50.75, "epoch": 2.4363365599404316, "grad_norm": 0.004133816641844492, "kl": 0.326171875, "learning_rate": 8.688383206550809e-07, "loss": 0.0003264064434915781, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1635, "train_speed(iter/s)": 0.029525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.25, "completions/mean_length": 90.71875190734863, "completions/min_length": 51.25, "epoch": 2.4378257632166793, "grad_norm": 0.004186157959164437, "kl": 0.3408203125, "learning_rate": 8.68678609166213e-07, "loss": 0.00034103565849363804, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1636, "train_speed(iter/s)": 0.029526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.5, "completions/mean_length": 99.71875381469727, "completions/min_length": 45.5, "epoch": 2.439314966492926, "grad_norm": 0.6802259985455492, "kl": 0.33154296875, "learning_rate": 8.685188151971017e-07, "loss": -0.008271575905382633, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1637, "train_speed(iter/s)": 0.029531 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 88.04166793823242, "completions/min_length": 50.0, "epoch": 2.4408041697691734, "grad_norm": 0.8649825947042947, "kl": 0.3544921875, "learning_rate": 8.683589387834956e-07, "loss": 0.0180269293487072, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1638, "train_speed(iter/s)": 0.029533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 93.18750190734863, "completions/min_length": 48.75, "epoch": 2.4422933730454206, "grad_norm": 1.638203042268074, "kl": 0.34814453125, "learning_rate": 8.681989799611618e-07, "loss": 0.018133794888854027, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.7083333656191826, "rewards/CineAccuracyORM/std": 0.3483504578471184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1639, "train_speed(iter/s)": 0.029533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 95.93750381469727, "completions/min_length": 54.5, "epoch": 2.443782576321668, "grad_norm": 1.190628944027524, "kl": 0.32666015625, "learning_rate": 8.680389387658866e-07, "loss": -0.0020008310675621033, "memory(GiB)": 112.53, "reward": 1.8125, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.23826994746923447, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1640, "train_speed(iter/s)": 0.029532 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 97.90625381469727, "completions/min_length": 56.25, "epoch": 2.445271779597915, "grad_norm": 1.648425857574827, "kl": 0.3173828125, "learning_rate": 8.678788152334736e-07, "loss": 0.024816324934363365, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.49164988845586777, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1641, "train_speed(iter/s)": 0.029526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 90.60416984558105, "completions/min_length": 45.0, "epoch": 2.4467609828741623, "grad_norm": 1.4611061762907087, "kl": 0.3505859375, "learning_rate": 8.677186093997457e-07, "loss": -0.013145575299859047, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.23421530425548553, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1642, "train_speed(iter/s)": 0.029524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 88.69791793823242, "completions/min_length": 53.25, "epoch": 2.4482501861504096, "grad_norm": 0.7069312755302244, "kl": 0.3603515625, "learning_rate": 8.675583213005442e-07, "loss": -0.004668531008064747, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1643, "train_speed(iter/s)": 0.029527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 91.89583778381348, "completions/min_length": 51.0, "epoch": 2.449739389426657, "grad_norm": 0.004694271429991134, "kl": 0.341796875, "learning_rate": 8.67397950971728e-07, "loss": 0.00034154555760324, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1644, "train_speed(iter/s)": 0.029527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 95.32291793823242, "completions/min_length": 58.5, "epoch": 2.451228592702904, "grad_norm": 1.1031701505709373, "kl": 0.33154296875, "learning_rate": 8.67237498449175e-07, "loss": 0.025250021368265152, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.3717081770300865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1645, "train_speed(iter/s)": 0.029533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 92.02083587646484, "completions/min_length": 48.0, "epoch": 2.4527177959791513, "grad_norm": 0.004457962119857465, "kl": 0.3427734375, "learning_rate": 8.670769637687818e-07, "loss": 0.0003427794435992837, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1646, "train_speed(iter/s)": 0.029534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.0, "completions/mean_length": 92.64583587646484, "completions/min_length": 55.0, "epoch": 2.4542069992553985, "grad_norm": 1.3722265266921583, "kl": 0.36376953125, "learning_rate": 8.669163469664627e-07, "loss": 0.007231011055409908, "memory(GiB)": 112.53, "reward": 1.8437500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.2946811020374298, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1647, "train_speed(iter/s)": 0.029526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 95.92708587646484, "completions/min_length": 54.5, "epoch": 2.4556962025316453, "grad_norm": 0.0046613042390663394, "kl": 0.35205078125, "learning_rate": 8.667556480781506e-07, "loss": 0.00035252628731541336, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1648, "train_speed(iter/s)": 0.029517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.0, "completions/mean_length": 91.50000190734863, "completions/min_length": 54.75, "epoch": 2.457185405807893, "grad_norm": 1.335350592284833, "kl": 0.33740234375, "learning_rate": 8.665948671397969e-07, "loss": 0.006418577395379543, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7708333507180214, "rewards/CineAccuracyORM/std": 0.2986612282693386, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1649, "train_speed(iter/s)": 0.029517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 92.17708587646484, "completions/min_length": 53.75, "epoch": 2.45867460908414, "grad_norm": 1.608316530630696, "kl": 0.34375, "learning_rate": 8.664340041873715e-07, "loss": -0.02002488449215889, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.7708333507180214, "rewards/CineAccuracyORM/std": 0.28614169359207153, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1650, "train_speed(iter/s)": 0.029518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 100.31250190734863, "completions/min_length": 49.5, "epoch": 2.460163812360387, "grad_norm": 0.0049218853341038695, "kl": 0.3212890625, "learning_rate": 8.662730592568619e-07, "loss": 0.0003217277699150145, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1651, "train_speed(iter/s)": 0.029514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 83.70833587646484, "completions/min_length": 45.75, "epoch": 2.4616530156366343, "grad_norm": 0.0053786652169327195, "kl": 0.35302734375, "learning_rate": 8.66112032384275e-07, "loss": 0.00035274820402264595, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1652, "train_speed(iter/s)": 0.02952 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.0, "completions/mean_length": 92.80208587646484, "completions/min_length": 50.75, "epoch": 2.4631422189128815, "grad_norm": 0.9048775805300392, "kl": 0.33544921875, "learning_rate": 8.659509236056352e-07, "loss": -0.004445429891347885, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1653, "train_speed(iter/s)": 0.029516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.25, "completions/mean_length": 98.63541793823242, "completions/min_length": 57.75, "epoch": 2.464631422189129, "grad_norm": 0.6703891338854494, "kl": 0.34033203125, "learning_rate": 8.657897329569856e-07, "loss": 0.008724083192646503, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1654, "train_speed(iter/s)": 0.029514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 90.00000381469727, "completions/min_length": 51.5, "epoch": 2.466120625465376, "grad_norm": 0.004720795956077126, "kl": 0.3466796875, "learning_rate": 8.656284604743875e-07, "loss": 0.00034624425461515784, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1655, "train_speed(iter/s)": 0.02951 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 94.52083587646484, "completions/min_length": 61.25, "epoch": 2.4676098287416233, "grad_norm": 1.8684397368039263, "kl": 0.3486328125, "learning_rate": 8.654671061939207e-07, "loss": -0.021521015092730522, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.48734044283628464, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1656, "train_speed(iter/s)": 0.029506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 97.35416984558105, "completions/min_length": 55.5, "epoch": 2.4690990320178705, "grad_norm": 0.00548844732459554, "kl": 0.34130859375, "learning_rate": 8.65305670151683e-07, "loss": 0.000341268430929631, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1657, "train_speed(iter/s)": 0.029498 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 96.95833587646484, "completions/min_length": 50.75, "epoch": 2.4705882352941178, "grad_norm": 0.0051539338233843705, "kl": 0.3408203125, "learning_rate": 8.651441523837907e-07, "loss": 0.00034048163797706366, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1658, "train_speed(iter/s)": 0.029503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.5, "completions/mean_length": 95.65625190734863, "completions/min_length": 53.0, "epoch": 2.472077438570365, "grad_norm": 0.005368461872058457, "kl": 0.3525390625, "learning_rate": 8.649825529263782e-07, "loss": 0.00035276816925033927, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1659, "train_speed(iter/s)": 0.029509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 103.87500381469727, "completions/min_length": 51.0, "epoch": 2.4735666418466122, "grad_norm": 1.4396779991768902, "kl": 0.33837890625, "learning_rate": 8.648208718155985e-07, "loss": -0.010052226483821869, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.12028077617287636, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.46742958575487137, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1660, "train_speed(iter/s)": 0.029499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 89.65625190734863, "completions/min_length": 52.25, "epoch": 2.475055845122859, "grad_norm": 1.2607792575403793, "kl": 0.33837890625, "learning_rate": 8.646591090876224e-07, "loss": -0.012349891476333141, "memory(GiB)": 112.53, "reward": 1.5729167461395264, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.4939185827970505, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1661, "train_speed(iter/s)": 0.0295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.0, "completions/mean_length": 100.66666984558105, "completions/min_length": 51.0, "epoch": 2.4765450483991067, "grad_norm": 0.9062587024247396, "kl": 0.3193359375, "learning_rate": 8.644972647786395e-07, "loss": 0.012814681977033615, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1662, "train_speed(iter/s)": 0.0295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 94.51041793823242, "completions/min_length": 49.75, "epoch": 2.4780342516753535, "grad_norm": 0.004456433951865957, "kl": 0.3388671875, "learning_rate": 8.643353389248572e-07, "loss": 0.00033903063740581274, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1663, "train_speed(iter/s)": 0.029491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 93.22916793823242, "completions/min_length": 53.75, "epoch": 2.4795234549516008, "grad_norm": 0.8779657855328049, "kl": 0.3388671875, "learning_rate": 8.641733315625014e-07, "loss": 0.016679806634783745, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1664, "train_speed(iter/s)": 0.029491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.25, "completions/mean_length": 91.37500190734863, "completions/min_length": 59.5, "epoch": 2.481012658227848, "grad_norm": 1.0920779859378615, "kl": 0.330078125, "learning_rate": 8.640112427278162e-07, "loss": -0.003819905687123537, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1665, "train_speed(iter/s)": 0.029497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.0, "completions/mean_length": 101.44791793823242, "completions/min_length": 53.5, "epoch": 2.4825018615040952, "grad_norm": 0.00497955788933875, "kl": 0.341796875, "learning_rate": 8.638490724570635e-07, "loss": 0.0003409069904591888, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1666, "train_speed(iter/s)": 0.029493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 93.64583587646484, "completions/min_length": 53.25, "epoch": 2.4839910647803425, "grad_norm": 1.1036959775361157, "kl": 0.35302734375, "learning_rate": 8.636868207865243e-07, "loss": 0.0013946108520030975, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1667, "train_speed(iter/s)": 0.029484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 98.77083587646484, "completions/min_length": 56.75, "epoch": 2.4854802680565897, "grad_norm": 0.0054494441449126525, "kl": 0.33984375, "learning_rate": 8.63524487752497e-07, "loss": 0.0003402509610168636, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1668, "train_speed(iter/s)": 0.02949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 90.45833587646484, "completions/min_length": 46.75, "epoch": 2.486969471332837, "grad_norm": 0.776938581931578, "kl": 0.33642578125, "learning_rate": 8.633620733912988e-07, "loss": 0.007475101388990879, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1669, "train_speed(iter/s)": 0.029488 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.5, "completions/mean_length": 89.85416984558105, "completions/min_length": 46.75, "epoch": 2.488458674609084, "grad_norm": 0.9474713479598018, "kl": 0.35888671875, "learning_rate": 8.631995777392644e-07, "loss": 0.002926829969510436, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1670, "train_speed(iter/s)": 0.029484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 90.97916793823242, "completions/min_length": 49.0, "epoch": 2.4899478778853315, "grad_norm": 0.004729096953718126, "kl": 0.33935546875, "learning_rate": 8.630370008327474e-07, "loss": 0.00033918709959834814, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1671, "train_speed(iter/s)": 0.029475 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 96.64583587646484, "completions/min_length": 52.25, "epoch": 2.4914370811615787, "grad_norm": 0.0052096034105043425, "kl": 0.33203125, "learning_rate": 8.628743427081194e-07, "loss": 0.0003323142300359905, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1672, "train_speed(iter/s)": 0.029476 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 96.44791984558105, "completions/min_length": 53.25, "epoch": 2.492926284437826, "grad_norm": 0.00535442675173719, "kl": 0.32470703125, "learning_rate": 8.627116034017695e-07, "loss": 0.0003244643739890307, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1673, "train_speed(iter/s)": 0.029477 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 94.09375381469727, "completions/min_length": 51.5, "epoch": 2.4944154877140727, "grad_norm": 0.004321789805558355, "kl": 0.33349609375, "learning_rate": 8.625487829501059e-07, "loss": 0.0003335962537676096, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1674, "train_speed(iter/s)": 0.029478 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 89.42708778381348, "completions/min_length": 51.25, "epoch": 2.4959046909903204, "grad_norm": 0.7344244475629865, "kl": 0.3515625, "learning_rate": 8.623858813895546e-07, "loss": 0.007024690508842468, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1675, "train_speed(iter/s)": 0.029483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.0, "completions/mean_length": 94.55208587646484, "completions/min_length": 40.75, "epoch": 2.497393894266567, "grad_norm": 0.5310297435633441, "kl": 0.3310546875, "learning_rate": 8.622228987565595e-07, "loss": -0.009487763047218323, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1676, "train_speed(iter/s)": 0.029478 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 89.37500381469727, "completions/min_length": 52.75, "epoch": 2.4988830975428145, "grad_norm": 0.960601232716663, "kl": 0.365234375, "learning_rate": 8.620598350875829e-07, "loss": -0.0019414023263379931, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1677, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 91.57291793823242, "completions/min_length": 55.5, "epoch": 2.5003723008190617, "grad_norm": 1.2389843372770681, "kl": 0.345703125, "learning_rate": 8.618966904191051e-07, "loss": -0.012498533353209496, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1678, "train_speed(iter/s)": 0.029485 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 93.50000190734863, "completions/min_length": 51.75, "epoch": 2.501861504095309, "grad_norm": 1.0719820637218278, "kl": 0.3642578125, "learning_rate": 8.617334647876249e-07, "loss": -0.00396999204531312, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1679, "train_speed(iter/s)": 0.02949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 95.98958587646484, "completions/min_length": 45.25, "epoch": 2.503350707371556, "grad_norm": 1.517913241261941, "kl": 0.3486328125, "learning_rate": 8.615701582296584e-07, "loss": -0.01263434998691082, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.1978268027305603, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1680, "train_speed(iter/s)": 0.029495 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.0, "completions/mean_length": 98.17708396911621, "completions/min_length": 47.25, "epoch": 2.5048399106478034, "grad_norm": 2.448043610916796, "kl": 0.34375, "learning_rate": 8.614067707817407e-07, "loss": 0.007951153442263603, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.14518376626074314, "rewards/CineAccuracyORM/mean": 0.5000000186264515, "rewards/CineAccuracyORM/std": 0.37030304595828056, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1681, "train_speed(iter/s)": 0.02949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.0, "completions/mean_length": 96.68750190734863, "completions/min_length": 48.0, "epoch": 2.5063291139240507, "grad_norm": 0.005166190290549877, "kl": 0.326171875, "learning_rate": 8.612433024804246e-07, "loss": 0.00032602535793557763, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1682, "train_speed(iter/s)": 0.029482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.75, "completions/mean_length": 103.36458587646484, "completions/min_length": 57.0, "epoch": 2.507818317200298, "grad_norm": 1.1044479487275742, "kl": 0.31103515625, "learning_rate": 8.610797533622807e-07, "loss": 0.010477914474904537, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8125000298023224, "rewards/CineAccuracyORM/std": 0.3428337797522545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1683, "train_speed(iter/s)": 0.029482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 98.23958778381348, "completions/min_length": 52.75, "epoch": 2.509307520476545, "grad_norm": 0.9797127063118867, "kl": 0.33203125, "learning_rate": 8.609161234638982e-07, "loss": -0.0019111910369247198, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1684, "train_speed(iter/s)": 0.029478 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.0, "completions/mean_length": 101.42708587646484, "completions/min_length": 56.5, "epoch": 2.5107967237527924, "grad_norm": 0.00499193784704934, "kl": 0.31494140625, "learning_rate": 8.607524128218842e-07, "loss": 0.00031495350413024426, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1685, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 95.94791984558105, "completions/min_length": 53.75, "epoch": 2.5122859270290396, "grad_norm": 0.004402945650934066, "kl": 0.328125, "learning_rate": 8.605886214728635e-07, "loss": 0.0003280765376985073, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1686, "train_speed(iter/s)": 0.02948 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 96.0625, "completions/min_length": 45.5, "epoch": 2.5137751303052864, "grad_norm": 0.866394981360421, "kl": 0.3232421875, "learning_rate": 8.604247494534796e-07, "loss": 0.005721153225749731, "memory(GiB)": 112.53, "reward": 1.5312500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500074505806, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1687, "train_speed(iter/s)": 0.029485 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 105.51041793823242, "completions/min_length": 60.0, "epoch": 2.515264333581534, "grad_norm": 1.2127702582303848, "kl": 0.3095703125, "learning_rate": 8.602607968003934e-07, "loss": 0.017278539016842842, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.34146176278591156, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1688, "train_speed(iter/s)": 0.029491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 96.34375190734863, "completions/min_length": 51.0, "epoch": 2.516753536857781, "grad_norm": 1.0632158226994888, "kl": 0.31591796875, "learning_rate": 8.600967635502845e-07, "loss": -0.0044239661656320095, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1689, "train_speed(iter/s)": 0.029496 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 98.75000190734863, "completions/min_length": 53.5, "epoch": 2.518242740134028, "grad_norm": 0.004557750586258749, "kl": 0.31689453125, "learning_rate": 8.5993264973985e-07, "loss": 0.00031597758061252534, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1690, "train_speed(iter/s)": 0.029502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 93.97916984558105, "completions/min_length": 47.75, "epoch": 2.5197319434102754, "grad_norm": 1.0917660095737227, "kl": 0.3515625, "learning_rate": 8.597684554058052e-07, "loss": -0.008108219131827354, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.4154365845024586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1691, "train_speed(iter/s)": 0.029498 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 97.10416984558105, "completions/min_length": 58.0, "epoch": 2.5212211466865226, "grad_norm": 2.103002195556027, "kl": 0.33203125, "learning_rate": 8.596041805848833e-07, "loss": 0.00905697327107191, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1692, "train_speed(iter/s)": 0.029503 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.0, "completions/mean_length": 104.58333587646484, "completions/min_length": 49.0, "epoch": 2.52271034996277, "grad_norm": 0.004654278901213265, "kl": 0.29736328125, "learning_rate": 8.594398253138358e-07, "loss": 0.00029758340679109097, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1693, "train_speed(iter/s)": 0.029501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 100.27083587646484, "completions/min_length": 47.0, "epoch": 2.524199553239017, "grad_norm": 0.7755528550940316, "kl": 0.33203125, "learning_rate": 8.592753896294319e-07, "loss": 0.019456911832094193, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1694, "train_speed(iter/s)": 0.029501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 101.92708587646484, "completions/min_length": 47.25, "epoch": 2.5256887565152644, "grad_norm": 0.8539133207350109, "kl": 0.3056640625, "learning_rate": 8.591108735684592e-07, "loss": -0.002041054889559746, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1695, "train_speed(iter/s)": 0.029502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 106.87500190734863, "completions/min_length": 55.5, "epoch": 2.5271779597915116, "grad_norm": 0.005105613632917418, "kl": 0.31494140625, "learning_rate": 8.589462771677225e-07, "loss": 0.0003148532996419817, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1696, "train_speed(iter/s)": 0.029498 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.25, "completions/mean_length": 97.42708587646484, "completions/min_length": 48.0, "epoch": 2.528667163067759, "grad_norm": 0.0045933255012005555, "kl": 0.318359375, "learning_rate": 8.587816004640456e-07, "loss": 0.0003184863307978958, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1697, "train_speed(iter/s)": 0.029498 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 88.22916984558105, "completions/min_length": 42.0, "epoch": 2.530156366344006, "grad_norm": 1.1205552019309268, "kl": 0.36083984375, "learning_rate": 8.586168434942694e-07, "loss": -0.005316486116498709, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1698, "train_speed(iter/s)": 0.029493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 101.58333778381348, "completions/min_length": 53.0, "epoch": 2.5316455696202533, "grad_norm": 1.0506554579001366, "kl": 0.2998046875, "learning_rate": 8.584520062952529e-07, "loss": -0.009253047406673431, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1699, "train_speed(iter/s)": 0.029493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 104.28125381469727, "completions/min_length": 52.0, "epoch": 2.5331347728965, "grad_norm": 2.444075906037808, "kl": 0.31689453125, "learning_rate": 8.582870889038738e-07, "loss": 0.010359223932027817, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.16937757655978203, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3261406943202019, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1700, "train_speed(iter/s)": 0.029492 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 92.23958587646484, "completions/min_length": 39.0, "epoch": 2.534623976172748, "grad_norm": 1.442918723466826, "kl": 0.3291015625, "learning_rate": 8.581220913570268e-07, "loss": -0.0012646907707676291, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2558748833835125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1701, "train_speed(iter/s)": 0.029494 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 91.51041984558105, "completions/min_length": 49.5, "epoch": 2.5361131794489946, "grad_norm": 1.787360752955143, "kl": 0.34130859375, "learning_rate": 8.57957013691625e-07, "loss": 0.014464771375060081, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3477324768900871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1702, "train_speed(iter/s)": 0.029494 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 100.62500381469727, "completions/min_length": 42.0, "epoch": 2.537602382725242, "grad_norm": 0.004965792634370702, "kl": 0.30712890625, "learning_rate": 8.577918559445993e-07, "loss": 0.0003068475052714348, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1703, "train_speed(iter/s)": 0.029493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 96.17708587646484, "completions/min_length": 49.5, "epoch": 2.539091586001489, "grad_norm": 1.2614631965715373, "kl": 0.30078125, "learning_rate": 8.576266181528987e-07, "loss": 0.010218371637165546, "memory(GiB)": 112.53, "reward": 1.6041667461395264, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4952603876590729, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1704, "train_speed(iter/s)": 0.029491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 107.65625190734863, "completions/min_length": 39.5, "epoch": 2.5405807892777363, "grad_norm": 0.004761900651841552, "kl": 0.2958984375, "learning_rate": 8.574613003534899e-07, "loss": 0.0002960842102766037, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1705, "train_speed(iter/s)": 0.029486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 92.75000190734863, "completions/min_length": 44.5, "epoch": 2.5420699925539836, "grad_norm": 0.8572240226360328, "kl": 0.29150390625, "learning_rate": 8.572959025833573e-07, "loss": 0.023115983232855797, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1706, "train_speed(iter/s)": 0.029487 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 93.95833778381348, "completions/min_length": 41.75, "epoch": 2.543559195830231, "grad_norm": 1.1840811185042162, "kl": 0.306640625, "learning_rate": 8.571304248795037e-07, "loss": 0.0009705245029181242, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000149011612, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1707, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 93.68750190734863, "completions/min_length": 59.5, "epoch": 2.545048399106478, "grad_norm": 0.004878088100566744, "kl": 0.3037109375, "learning_rate": 8.569648672789496e-07, "loss": 0.00030358636286109686, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1708, "train_speed(iter/s)": 0.029481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 91.16666793823242, "completions/min_length": 39.5, "epoch": 2.5465376023827253, "grad_norm": 0.0062085691605256906, "kl": 0.31298828125, "learning_rate": 8.56799229818733e-07, "loss": 0.00031286748708225787, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1709, "train_speed(iter/s)": 0.029482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.25, "completions/mean_length": 98.75000381469727, "completions/min_length": 47.25, "epoch": 2.5480268056589725, "grad_norm": 0.8578977070373511, "kl": 0.30810546875, "learning_rate": 8.566335125359103e-07, "loss": 0.006736219860613346, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1710, "train_speed(iter/s)": 0.029486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 90.66666793823242, "completions/min_length": 36.75, "epoch": 2.54951600893522, "grad_norm": 2.2125705718200974, "kl": 0.3193359375, "learning_rate": 8.564677154675557e-07, "loss": -0.004119519144296646, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3502917140722275, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1711, "train_speed(iter/s)": 0.029482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 87.43750381469727, "completions/min_length": 41.75, "epoch": 2.551005212211467, "grad_norm": 0.005619069388629602, "kl": 0.3271484375, "learning_rate": 8.563018386507607e-07, "loss": 0.0003270791785325855, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1712, "train_speed(iter/s)": 0.029483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 95.00000190734863, "completions/min_length": 38.0, "epoch": 2.552494415487714, "grad_norm": 0.8258310386618168, "kl": 0.33349609375, "learning_rate": 8.561358821226352e-07, "loss": 0.005548170767724514, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1713, "train_speed(iter/s)": 0.029483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 90.65625190734863, "completions/min_length": 39.0, "epoch": 2.5539836187639615, "grad_norm": 0.843208501988133, "kl": 0.3125, "learning_rate": 8.559698459203067e-07, "loss": 0.004169571213424206, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1714, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 96.00000190734863, "completions/min_length": 39.0, "epoch": 2.5554728220402083, "grad_norm": 0.005201866253454655, "kl": 0.3173828125, "learning_rate": 8.558037300809208e-07, "loss": 0.00031690922332927585, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1715, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 93.18750381469727, "completions/min_length": 40.25, "epoch": 2.5569620253164556, "grad_norm": 0.004975184866492803, "kl": 0.31982421875, "learning_rate": 8.556375346416404e-07, "loss": 0.0003188096161466092, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1716, "train_speed(iter/s)": 0.029476 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 96.72916984558105, "completions/min_length": 41.25, "epoch": 2.558451228592703, "grad_norm": 1.6053416289917974, "kl": 0.30615234375, "learning_rate": 8.554712596396466e-07, "loss": -0.015389548614621162, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.35000117123126984, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1717, "train_speed(iter/s)": 0.029478 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 141.5, "completions/mean_length": 83.06250190734863, "completions/min_length": 40.25, "epoch": 2.55994043186895, "grad_norm": 0.005190161661769345, "kl": 0.34228515625, "learning_rate": 8.553049051121382e-07, "loss": 0.0003417425323277712, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1718, "train_speed(iter/s)": 0.029482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 92.65625381469727, "completions/min_length": 41.75, "epoch": 2.5614296351451973, "grad_norm": 1.285476279975958, "kl": 0.3046875, "learning_rate": 8.551384710963319e-07, "loss": -0.009691545739769936, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.2074255645275116, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1719, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 91.42708587646484, "completions/min_length": 41.25, "epoch": 2.5629188384214445, "grad_norm": 0.004814794105582469, "kl": 0.32958984375, "learning_rate": 8.54971957629462e-07, "loss": 0.00032984319841489196, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1720, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 93.54166984558105, "completions/min_length": 50.75, "epoch": 2.5644080416976918, "grad_norm": 0.0053804945479116306, "kl": 0.302001953125, "learning_rate": 8.548053647487807e-07, "loss": 0.000301800318993628, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1721, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 91.11458587646484, "completions/min_length": 41.75, "epoch": 2.565897244973939, "grad_norm": 0.879249806496835, "kl": 0.3056640625, "learning_rate": 8.546386924915578e-07, "loss": 0.01336782705038786, "memory(GiB)": 112.53, "reward": 1.5312500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500074505806, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1722, "train_speed(iter/s)": 0.029476 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 94.50000381469727, "completions/min_length": 34.75, "epoch": 2.5673864482501862, "grad_norm": 1.5530562275357938, "kl": 0.3203125, "learning_rate": 8.544719408950809e-07, "loss": -0.005420319736003876, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1723, "train_speed(iter/s)": 0.029481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 95.07291984558105, "completions/min_length": 43.0, "epoch": 2.5688756515264335, "grad_norm": 1.1185871665422416, "kl": 0.33349609375, "learning_rate": 8.543051099966557e-07, "loss": 0.0029784664511680603, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1724, "train_speed(iter/s)": 0.029475 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 94.86458587646484, "completions/min_length": 34.5, "epoch": 2.5703648548026807, "grad_norm": 0.9573488318648676, "kl": 0.31005859375, "learning_rate": 8.541381998336052e-07, "loss": 0.0017371641006320715, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1725, "train_speed(iter/s)": 0.029478 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 87.03125, "completions/min_length": 43.75, "epoch": 2.5718540580789275, "grad_norm": 1.015864384171231, "kl": 0.33203125, "learning_rate": 8.539712104432704e-07, "loss": 0.009724093601107597, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1726, "train_speed(iter/s)": 0.029479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 90.03125381469727, "completions/min_length": 46.75, "epoch": 2.573343261355175, "grad_norm": 1.393822084441426, "kl": 0.341796875, "learning_rate": 8.538041418630098e-07, "loss": 0.00558079406619072, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.17466487362980843, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1727, "train_speed(iter/s)": 0.029475 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.25, "completions/mean_length": 97.81250190734863, "completions/min_length": 42.0, "epoch": 2.574832464631422, "grad_norm": 0.005562708865117901, "kl": 0.3310546875, "learning_rate": 8.536369941301998e-07, "loss": 0.0003307986189611256, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1728, "train_speed(iter/s)": 0.029476 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 91.64583778381348, "completions/min_length": 38.75, "epoch": 2.5763216679076693, "grad_norm": 0.005449447090237561, "kl": 0.32177734375, "learning_rate": 8.534697672822344e-07, "loss": 0.0003221445658709854, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1729, "train_speed(iter/s)": 0.029472 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 95.10416793823242, "completions/min_length": 49.25, "epoch": 2.5778108711839165, "grad_norm": 0.7417667327984022, "kl": 0.3232421875, "learning_rate": 8.533024613565255e-07, "loss": 0.004451867192983627, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1730, "train_speed(iter/s)": 0.029462 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 92.79166984558105, "completions/min_length": 45.25, "epoch": 2.5793000744601637, "grad_norm": 0.005853762027743512, "kl": 0.3486328125, "learning_rate": 8.531350763905023e-07, "loss": 0.000349315901985392, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1731, "train_speed(iter/s)": 0.029454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 99.76041793823242, "completions/min_length": 42.25, "epoch": 2.580789277736411, "grad_norm": 0.0051879343125512895, "kl": 0.314453125, "learning_rate": 8.529676124216122e-07, "loss": 0.0003137330641038716, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1732, "train_speed(iter/s)": 0.029451 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.25, "completions/mean_length": 97.79166984558105, "completions/min_length": 46.5, "epoch": 2.5822784810126582, "grad_norm": 1.0833971757516774, "kl": 0.322265625, "learning_rate": 8.528000694873197e-07, "loss": -0.010993830859661102, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1733, "train_speed(iter/s)": 0.029441 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.75, "completions/mean_length": 93.84375381469727, "completions/min_length": 48.0, "epoch": 2.5837676842889055, "grad_norm": 0.005239810610630194, "kl": 0.31591796875, "learning_rate": 8.526324476251074e-07, "loss": 0.0003163036017213017, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1734, "train_speed(iter/s)": 0.029446 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 98.59375381469727, "completions/min_length": 48.5, "epoch": 2.5852568875651527, "grad_norm": 1.0725510708285582, "kl": 0.322265625, "learning_rate": 8.524647468724756e-07, "loss": 0.005042146425694227, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1735, "train_speed(iter/s)": 0.029441 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 92.19791984558105, "completions/min_length": 41.75, "epoch": 2.5867460908414, "grad_norm": 0.004977012539138615, "kl": 0.32763671875, "learning_rate": 8.522969672669418e-07, "loss": 0.0003274375048931688, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1736, "train_speed(iter/s)": 0.02943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 96.60416984558105, "completions/min_length": 46.0, "epoch": 2.588235294117647, "grad_norm": 1.0131713762004364, "kl": 0.31494140625, "learning_rate": 8.521291088460416e-07, "loss": -0.01476791501045227, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1737, "train_speed(iter/s)": 0.029428 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.5, "completions/mean_length": 95.89583587646484, "completions/min_length": 46.25, "epoch": 2.5897244973938944, "grad_norm": 0.005667901994511007, "kl": 0.3369140625, "learning_rate": 8.519611716473278e-07, "loss": 0.0003373144136276096, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1738, "train_speed(iter/s)": 0.029428 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.25, "completions/mean_length": 89.88541984558105, "completions/min_length": 34.5, "epoch": 2.5912137006701412, "grad_norm": 1.838394517351121, "kl": 0.35400390625, "learning_rate": 8.517931557083713e-07, "loss": 0.022362463176250458, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1739, "train_speed(iter/s)": 0.029434 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 95.35417175292969, "completions/min_length": 55.25, "epoch": 2.592702903946389, "grad_norm": 0.0059613727856468995, "kl": 0.3330078125, "learning_rate": 8.516250610667604e-07, "loss": 0.0003333811182528734, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1740, "train_speed(iter/s)": 0.029439 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 95.67708587646484, "completions/min_length": 38.75, "epoch": 2.5941921072226357, "grad_norm": 0.0050719891998479705, "kl": 0.31494140625, "learning_rate": 8.514568877601007e-07, "loss": 0.00031492748530581594, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1741, "train_speed(iter/s)": 0.029435 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 96.62500190734863, "completions/min_length": 44.75, "epoch": 2.595681310498883, "grad_norm": 0.935803146612268, "kl": 0.328125, "learning_rate": 8.51288635826016e-07, "loss": -0.006645637564361095, "memory(GiB)": 112.53, "reward": 1.447916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1742, "train_speed(iter/s)": 0.029438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 91.97916984558105, "completions/min_length": 43.75, "epoch": 2.59717051377513, "grad_norm": 0.033596218466716794, "kl": 0.33447265625, "learning_rate": 8.511203053021472e-07, "loss": 0.0003340620896779001, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1743, "train_speed(iter/s)": 0.029434 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 95.43750190734863, "completions/min_length": 52.0, "epoch": 2.5986597170513774, "grad_norm": 1.0875295482413398, "kl": 0.32080078125, "learning_rate": 8.50951896226153e-07, "loss": 0.015788113698363304, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1744, "train_speed(iter/s)": 0.02943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 94.19791984558105, "completions/min_length": 38.0, "epoch": 2.6001489203276247, "grad_norm": 1.2011668381489748, "kl": 0.33935546875, "learning_rate": 8.507834086357098e-07, "loss": 0.008495194837450981, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1745, "train_speed(iter/s)": 0.029426 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/mean_length": 88.81250190734863, "completions/min_length": 39.5, "epoch": 2.601638123603872, "grad_norm": 1.4085926696949653, "kl": 0.33447265625, "learning_rate": 8.50614842568511e-07, "loss": -0.013081631623208523, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.864583358168602, "rewards/CineAccuracyORM/std": 0.26659026369452477, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1746, "train_speed(iter/s)": 0.029431 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 91.23958587646484, "completions/min_length": 44.25, "epoch": 2.603127326880119, "grad_norm": 0.005521777977631411, "kl": 0.328125, "learning_rate": 8.504461980622684e-07, "loss": 0.00032806210219860077, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1747, "train_speed(iter/s)": 0.029433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 95.98958396911621, "completions/min_length": 50.0, "epoch": 2.6046165301563664, "grad_norm": 0.6549591021939146, "kl": 0.33984375, "learning_rate": 8.502774751547106e-07, "loss": 0.0068051740527153015, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1748, "train_speed(iter/s)": 0.029438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.75, "completions/mean_length": 94.67708778381348, "completions/min_length": 50.75, "epoch": 2.6061057334326136, "grad_norm": 0.006461734593407767, "kl": 0.3408203125, "learning_rate": 8.501086738835843e-07, "loss": 0.0003402138245292008, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1749, "train_speed(iter/s)": 0.029438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.5, "completions/mean_length": 89.95833587646484, "completions/min_length": 37.75, "epoch": 2.607594936708861, "grad_norm": 1.2403441846225367, "kl": 0.36767578125, "learning_rate": 8.49939794286653e-07, "loss": 0.005688924342393875, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.41666668467223644, "rewards/CineAccuracyORM/std": 0.43725670874118805, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1750, "train_speed(iter/s)": 0.029429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 94.62500190734863, "completions/min_length": 47.5, "epoch": 2.609084139985108, "grad_norm": 0.6645987823760056, "kl": 0.32568359375, "learning_rate": 8.497708364016989e-07, "loss": 0.004145695362240076, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1751, "train_speed(iter/s)": 0.029426 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 136.0, "completions/mean_length": 83.87500190734863, "completions/min_length": 45.0, "epoch": 2.610573343261355, "grad_norm": 1.2544838139415941, "kl": 0.359375, "learning_rate": 8.496018002665207e-07, "loss": -0.0014536704402416945, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.3637066036462784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1752, "train_speed(iter/s)": 0.029427 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 95.15625190734863, "completions/min_length": 53.75, "epoch": 2.6120625465376026, "grad_norm": 1.466638290302285, "kl": 0.380859375, "learning_rate": 8.494326859189345e-07, "loss": -0.0010671764612197876, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.19888615608215332, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1753, "train_speed(iter/s)": 0.029424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 96.35416984558105, "completions/min_length": 44.25, "epoch": 2.6135517498138494, "grad_norm": 0.005988146091980825, "kl": 0.3310546875, "learning_rate": 8.492634933967748e-07, "loss": 0.0003309960011392832, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1754, "train_speed(iter/s)": 0.029424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.5, "completions/mean_length": 94.68750190734863, "completions/min_length": 47.5, "epoch": 2.6150409530900967, "grad_norm": 0.6584088138696385, "kl": 0.31787109375, "learning_rate": 8.490942227378932e-07, "loss": 0.004201872739940882, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1755, "train_speed(iter/s)": 0.029426 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 94.97916984558105, "completions/min_length": 50.25, "epoch": 2.616530156366344, "grad_norm": 0.571011177650074, "kl": 0.337890625, "learning_rate": 8.489248739801582e-07, "loss": -0.010623326525092125, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1756, "train_speed(iter/s)": 0.029423 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 92.36458396911621, "completions/min_length": 35.5, "epoch": 2.618019359642591, "grad_norm": 0.006114714569840979, "kl": 0.333984375, "learning_rate": 8.487554471614566e-07, "loss": 0.0003337652888149023, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1757, "train_speed(iter/s)": 0.029423 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 94.82291793823242, "completions/min_length": 46.25, "epoch": 2.6195085629188384, "grad_norm": 0.8104403758615049, "kl": 0.30615234375, "learning_rate": 8.485859423196924e-07, "loss": -0.022202983498573303, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1758, "train_speed(iter/s)": 0.029416 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 92.98958396911621, "completions/min_length": 46.75, "epoch": 2.6209977661950856, "grad_norm": 0.006350376283039224, "kl": 0.34375, "learning_rate": 8.484163594927867e-07, "loss": 0.00034409062936902046, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1759, "train_speed(iter/s)": 0.029418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 93.07291984558105, "completions/min_length": 47.75, "epoch": 2.622486969471333, "grad_norm": 0.006286588901474986, "kl": 0.32861328125, "learning_rate": 8.482466987186785e-07, "loss": 0.0003284193226136267, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1760, "train_speed(iter/s)": 0.029419 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 93.10416793823242, "completions/min_length": 47.0, "epoch": 2.62397617274758, "grad_norm": 0.7385635354937974, "kl": 0.3486328125, "learning_rate": 8.48076960035324e-07, "loss": -0.0002678588207345456, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1761, "train_speed(iter/s)": 0.029419 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.25, "completions/mean_length": 92.03125381469727, "completions/min_length": 39.75, "epoch": 2.6254653760238273, "grad_norm": 0.008249680236723636, "kl": 0.3544921875, "learning_rate": 8.479071434806968e-07, "loss": 0.00035458389902487397, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1762, "train_speed(iter/s)": 0.029416 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 92.97916984558105, "completions/min_length": 46.25, "epoch": 2.6269545793000746, "grad_norm": 1.8899852076388042, "kl": 0.34765625, "learning_rate": 8.477372490927881e-07, "loss": -0.00044316481216810644, "memory(GiB)": 112.53, "reward": 1.4062500596046448, "reward_std": 0.11258216947317123, "rewards/CineAccuracyORM/mean": 0.4062500111758709, "rewards/CineAccuracyORM/std": 0.46318942308425903, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1763, "train_speed(iter/s)": 0.029416 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 95.45833587646484, "completions/min_length": 43.5, "epoch": 2.628443782576322, "grad_norm": 1.40424670485033, "kl": 0.333984375, "learning_rate": 8.475672769096067e-07, "loss": -0.013005364686250687, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.21978919208049774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1764, "train_speed(iter/s)": 0.029412 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.0, "completions/mean_length": 95.06250190734863, "completions/min_length": 47.5, "epoch": 2.6299329858525686, "grad_norm": 0.005580718320204705, "kl": 0.3369140625, "learning_rate": 8.47397226969178e-07, "loss": 0.0003369641490280628, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1765, "train_speed(iter/s)": 0.029409 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 89.71875190734863, "completions/min_length": 48.5, "epoch": 2.6314221891288163, "grad_norm": 1.189470593937395, "kl": 0.35205078125, "learning_rate": 8.472270993095458e-07, "loss": 0.005348077043890953, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1766, "train_speed(iter/s)": 0.02941 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 92.57291984558105, "completions/min_length": 47.0, "epoch": 2.632911392405063, "grad_norm": 0.005182550070805417, "kl": 0.32470703125, "learning_rate": 8.470568939687706e-07, "loss": 0.00032486856798641384, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1767, "train_speed(iter/s)": 0.029415 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 96.23958587646484, "completions/min_length": 44.25, "epoch": 2.6344005956813104, "grad_norm": 0.005217762253412702, "kl": 0.3349609375, "learning_rate": 8.468866109849305e-07, "loss": 0.00033475435338914394, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1768, "train_speed(iter/s)": 0.029411 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.5, "completions/mean_length": 93.37500381469727, "completions/min_length": 44.5, "epoch": 2.6358897989575576, "grad_norm": 0.009015316858856393, "kl": 0.34130859375, "learning_rate": 8.467162503961208e-07, "loss": 0.0003416063264012337, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1769, "train_speed(iter/s)": 0.029407 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 90.19791984558105, "completions/min_length": 47.25, "epoch": 2.637379002233805, "grad_norm": 0.0053497495156842, "kl": 0.33935546875, "learning_rate": 8.465458122404548e-07, "loss": 0.00033893701038323343, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1770, "train_speed(iter/s)": 0.029407 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 95.96875190734863, "completions/min_length": 46.25, "epoch": 2.638868205510052, "grad_norm": 0.7208694187415091, "kl": 0.36279296875, "learning_rate": 8.46375296556062e-07, "loss": -0.0018926807679235935, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1771, "train_speed(iter/s)": 0.029408 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 91.05208587646484, "completions/min_length": 49.75, "epoch": 2.6403574087862993, "grad_norm": 0.006347703895172992, "kl": 0.3466796875, "learning_rate": 8.462047033810905e-07, "loss": 0.0003465873305685818, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1772, "train_speed(iter/s)": 0.029406 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 90.68750381469727, "completions/min_length": 49.0, "epoch": 2.6418466120625466, "grad_norm": 0.005664921583889366, "kl": 0.33251953125, "learning_rate": 8.46034032753705e-07, "loss": 0.000333171192323789, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1773, "train_speed(iter/s)": 0.029406 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 94.51041793823242, "completions/min_length": 48.25, "epoch": 2.643335815338794, "grad_norm": 1.3300125392013715, "kl": 0.330078125, "learning_rate": 8.458632847120875e-07, "loss": 0.003245358122512698, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1774, "train_speed(iter/s)": 0.029398 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 94.46875190734863, "completions/min_length": 53.75, "epoch": 2.644825018615041, "grad_norm": 0.00558413892367644, "kl": 0.3154296875, "learning_rate": 8.456924592944376e-07, "loss": 0.0003155340673401952, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1775, "train_speed(iter/s)": 0.0294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.75, "completions/mean_length": 90.12500190734863, "completions/min_length": 48.25, "epoch": 2.6463142218912883, "grad_norm": 1.2260251690581683, "kl": 0.33447265625, "learning_rate": 8.455215565389724e-07, "loss": 0.019702337682247162, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1776, "train_speed(iter/s)": 0.029392 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 94.09375381469727, "completions/min_length": 51.75, "epoch": 2.6478034251675355, "grad_norm": 0.005597158977191945, "kl": 0.31640625, "learning_rate": 8.453505764839254e-07, "loss": 0.0003160169580951333, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1777, "train_speed(iter/s)": 0.029393 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.5, "completions/mean_length": 101.19791793823242, "completions/min_length": 52.5, "epoch": 2.6492926284437823, "grad_norm": 1.0218814698856156, "kl": 0.33154296875, "learning_rate": 8.451795191675487e-07, "loss": 0.0019306911854073405, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1778, "train_speed(iter/s)": 0.029395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 95.73958587646484, "completions/min_length": 57.25, "epoch": 2.65078183172003, "grad_norm": 0.0068156665293265285, "kl": 0.3447265625, "learning_rate": 8.450083846281106e-07, "loss": 0.00034463085466995835, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1779, "train_speed(iter/s)": 0.029395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 93.78125190734863, "completions/min_length": 44.0, "epoch": 2.652271034996277, "grad_norm": 0.006079909805992249, "kl": 0.3388671875, "learning_rate": 8.448371729038972e-07, "loss": 0.00033879256807267666, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1780, "train_speed(iter/s)": 0.029388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 95.25000381469727, "completions/min_length": 52.0, "epoch": 2.653760238272524, "grad_norm": 0.004952916125974941, "kl": 0.341796875, "learning_rate": 8.446658840332115e-07, "loss": 0.0003417438128963113, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1781, "train_speed(iter/s)": 0.029384 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 91.69792175292969, "completions/min_length": 55.25, "epoch": 2.6552494415487713, "grad_norm": 0.006885481746345877, "kl": 0.353515625, "learning_rate": 8.444945180543744e-07, "loss": 0.00035380531335249543, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1782, "train_speed(iter/s)": 0.029383 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 95.34375190734863, "completions/min_length": 48.25, "epoch": 2.6567386448250185, "grad_norm": 0.665003542869276, "kl": 0.32958984375, "learning_rate": 8.443230750057233e-07, "loss": -0.006718365475535393, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1783, "train_speed(iter/s)": 0.029379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 87.88541984558105, "completions/min_length": 46.5, "epoch": 2.6582278481012658, "grad_norm": 0.0057540023613285515, "kl": 0.36962890625, "learning_rate": 8.441515549256133e-07, "loss": 0.0003695063933264464, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1784, "train_speed(iter/s)": 0.029379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.5, "completions/mean_length": 96.77083587646484, "completions/min_length": 47.75, "epoch": 2.659717051377513, "grad_norm": 4.88936284062851, "kl": 0.859375, "learning_rate": 8.439799578524167e-07, "loss": -0.005999981425702572, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.12624847888946533, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.3705996870994568, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1785, "train_speed(iter/s)": 0.029384 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 92.14583778381348, "completions/min_length": 52.25, "epoch": 2.6612062546537603, "grad_norm": 1.6036836351592798, "kl": 0.3466796875, "learning_rate": 8.438082838245229e-07, "loss": 0.002898682840168476, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1786, "train_speed(iter/s)": 0.029385 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 104.72916984558105, "completions/min_length": 53.5, "epoch": 2.6626954579300075, "grad_norm": 0.006681976469653189, "kl": 0.3232421875, "learning_rate": 8.436365328803384e-07, "loss": 0.0003231919836252928, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1787, "train_speed(iter/s)": 0.029389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.25, "completions/mean_length": 89.42708587646484, "completions/min_length": 47.5, "epoch": 2.6641846612062547, "grad_norm": 0.006001959533616671, "kl": 0.3681640625, "learning_rate": 8.434647050582873e-07, "loss": 0.00036767119308933616, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1788, "train_speed(iter/s)": 0.029382 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.25, "completions/mean_length": 92.06250381469727, "completions/min_length": 48.25, "epoch": 2.665673864482502, "grad_norm": 0.006346370015190952, "kl": 0.34716796875, "learning_rate": 8.432928003968108e-07, "loss": 0.000346546177752316, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1789, "train_speed(iter/s)": 0.029382 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 101.30208778381348, "completions/min_length": 61.5, "epoch": 2.6671630677587492, "grad_norm": 0.8485108250288543, "kl": 0.3466796875, "learning_rate": 8.431208189343669e-07, "loss": -0.0068390085361897945, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1790, "train_speed(iter/s)": 0.029387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 93.39583778381348, "completions/min_length": 52.5, "epoch": 2.668652271034996, "grad_norm": 0.9939421691611012, "kl": 0.34912109375, "learning_rate": 8.429487607094309e-07, "loss": -0.008874376304447651, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1791, "train_speed(iter/s)": 0.029388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 88.23958778381348, "completions/min_length": 52.75, "epoch": 2.6701414743112437, "grad_norm": 0.005267381450559246, "kl": 0.3544921875, "learning_rate": 8.427766257604956e-07, "loss": 0.000354749005055055, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1792, "train_speed(iter/s)": 0.029393 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 97.66666793823242, "completions/min_length": 51.25, "epoch": 2.6716306775874905, "grad_norm": 0.9139788714466208, "kl": 0.330078125, "learning_rate": 8.426044141260711e-07, "loss": 0.0060138884000480175, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1793, "train_speed(iter/s)": 0.029389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/mean_length": 98.45833587646484, "completions/min_length": 55.5, "epoch": 2.6731198808637378, "grad_norm": 2.006888598531332, "kl": 0.33984375, "learning_rate": 8.424321258446838e-07, "loss": -0.011385182850062847, "memory(GiB)": 112.53, "reward": 1.5729167461395264, "reward_std": 0.0876726359128952, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1794, "train_speed(iter/s)": 0.02938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 99.58333778381348, "completions/min_length": 52.0, "epoch": 2.674609084139985, "grad_norm": 0.005612187724026814, "kl": 0.3251953125, "learning_rate": 8.422597609548781e-07, "loss": 0.0003248430439271033, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1795, "train_speed(iter/s)": 0.02938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.25, "completions/mean_length": 103.13541984558105, "completions/min_length": 57.75, "epoch": 2.6760982874162322, "grad_norm": 0.9053156807926908, "kl": 0.34375, "learning_rate": 8.420873194952152e-07, "loss": 0.006074403878301382, "memory(GiB)": 112.53, "reward": 1.6041667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1796, "train_speed(iter/s)": 0.029376 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 94.58333587646484, "completions/min_length": 54.25, "epoch": 2.6775874906924795, "grad_norm": 0.005745313025289594, "kl": 0.345703125, "learning_rate": 8.419148015042733e-07, "loss": 0.00034557233448140323, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1797, "train_speed(iter/s)": 0.029381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 94.15625381469727, "completions/min_length": 53.5, "epoch": 2.6790766939687267, "grad_norm": 0.005448318007454921, "kl": 0.345703125, "learning_rate": 8.41742207020648e-07, "loss": 0.00034603729727678, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1798, "train_speed(iter/s)": 0.029378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 96.67708587646484, "completions/min_length": 49.25, "epoch": 2.680565897244974, "grad_norm": 0.9489669669567936, "kl": 0.3173828125, "learning_rate": 8.41569536082952e-07, "loss": 0.0023344785440713167, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1799, "train_speed(iter/s)": 0.029379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 91.47917175292969, "completions/min_length": 47.5, "epoch": 2.682055100521221, "grad_norm": 0.005764969537558346, "kl": 0.3427734375, "learning_rate": 8.413967887298147e-07, "loss": 0.0003430900105740875, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1800, "train_speed(iter/s)": 0.029379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.5, "completions/mean_length": 90.80208587646484, "completions/min_length": 45.25, "epoch": 2.6835443037974684, "grad_norm": 0.005904854533543554, "kl": 0.35107421875, "learning_rate": 8.41223964999883e-07, "loss": 0.000351838389178738, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1801, "train_speed(iter/s)": 0.029371 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 91.47916793823242, "completions/min_length": 50.25, "epoch": 2.6850335070737157, "grad_norm": 0.8495125741735103, "kl": 0.3408203125, "learning_rate": 8.41051064931821e-07, "loss": -0.007790758274495602, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1802, "train_speed(iter/s)": 0.029369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.0, "completions/mean_length": 96.42708587646484, "completions/min_length": 48.75, "epoch": 2.686522710349963, "grad_norm": 0.8894379996595989, "kl": 0.32861328125, "learning_rate": 8.408780885643093e-07, "loss": 0.008757101371884346, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1803, "train_speed(iter/s)": 0.029373 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 99.98958587646484, "completions/min_length": 50.75, "epoch": 2.6880119136262097, "grad_norm": 0.006379792167181343, "kl": 0.3271484375, "learning_rate": 8.407050359360462e-07, "loss": 0.00032784719951450825, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1804, "train_speed(iter/s)": 0.029377 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 94.71875190734863, "completions/min_length": 52.5, "epoch": 2.6895011169024574, "grad_norm": 0.9755402030399304, "kl": 0.35107421875, "learning_rate": 8.405319070857466e-07, "loss": -0.006838809233158827, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1805, "train_speed(iter/s)": 0.029373 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.5, "completions/mean_length": 99.58333587646484, "completions/min_length": 57.25, "epoch": 2.690990320178704, "grad_norm": 0.005465087214148387, "kl": 0.30322265625, "learning_rate": 8.403587020521428e-07, "loss": 0.0003031091473530978, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1806, "train_speed(iter/s)": 0.029372 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.25, "completions/mean_length": 94.18750381469727, "completions/min_length": 45.25, "epoch": 2.6924795234549515, "grad_norm": 0.005561304411184256, "kl": 0.345703125, "learning_rate": 8.401854208739837e-07, "loss": 0.0003456533304415643, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1807, "train_speed(iter/s)": 0.029368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 93.05208396911621, "completions/min_length": 48.0, "epoch": 2.6939687267311987, "grad_norm": 0.547780726423348, "kl": 0.34814453125, "learning_rate": 8.400120635900359e-07, "loss": -0.00681645143777132, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1808, "train_speed(iter/s)": 0.029363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.25, "completions/mean_length": 94.44791984558105, "completions/min_length": 45.75, "epoch": 2.695457930007446, "grad_norm": 0.005381555898844831, "kl": 0.34423828125, "learning_rate": 8.398386302390823e-07, "loss": 0.00034440422314219177, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1809, "train_speed(iter/s)": 0.029359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 91.88541793823242, "completions/min_length": 52.25, "epoch": 2.696947133283693, "grad_norm": 0.005005286605251668, "kl": 0.34521484375, "learning_rate": 8.396651208599234e-07, "loss": 0.00034517107997089624, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1810, "train_speed(iter/s)": 0.029357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 107.09375190734863, "completions/min_length": 53.75, "epoch": 2.6984363365599404, "grad_norm": 1.0239498650139351, "kl": 0.29833984375, "learning_rate": 8.394915354913763e-07, "loss": 0.0024936068803071976, "memory(GiB)": 112.53, "reward": 1.5104167461395264, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166818782687, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1811, "train_speed(iter/s)": 0.029356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 91.23958587646484, "completions/min_length": 49.0, "epoch": 2.6999255398361877, "grad_norm": 0.005593178902020373, "kl": 0.34130859375, "learning_rate": 8.393178741722753e-07, "loss": 0.00034115483867935836, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1812, "train_speed(iter/s)": 0.029361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 102.18750381469727, "completions/min_length": 52.5, "epoch": 2.701414743112435, "grad_norm": 1.0816211359487187, "kl": 0.30224609375, "learning_rate": 8.391441369414718e-07, "loss": 0.0114724887534976, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.33919306844472885, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1813, "train_speed(iter/s)": 0.029366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 95.50000381469727, "completions/min_length": 49.5, "epoch": 2.702903946388682, "grad_norm": 0.005495866588722626, "kl": 0.32177734375, "learning_rate": 8.389703238378338e-07, "loss": 0.00032227387418970466, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1814, "train_speed(iter/s)": 0.029371 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 100.77083778381348, "completions/min_length": 54.75, "epoch": 2.7043931496649294, "grad_norm": 0.9085053578638691, "kl": 0.31982421875, "learning_rate": 8.387964349002467e-07, "loss": -0.0026328829117119312, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333414047956, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1815, "train_speed(iter/s)": 0.029376 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 103.68750381469727, "completions/min_length": 50.75, "epoch": 2.7058823529411766, "grad_norm": 1.2274606393238159, "kl": 0.32373046875, "learning_rate": 8.386224701676127e-07, "loss": -0.007982107810676098, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.42095326259732246, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1816, "train_speed(iter/s)": 0.02938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 93.25000190734863, "completions/min_length": 52.25, "epoch": 2.7073715562174234, "grad_norm": 0.7952621350444156, "kl": 0.3349609375, "learning_rate": 8.384484296788508e-07, "loss": 0.009559986181557178, "memory(GiB)": 112.53, "reward": 1.7083334028720856, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1817, "train_speed(iter/s)": 0.029385 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 101.55208396911621, "completions/min_length": 49.75, "epoch": 2.708860759493671, "grad_norm": 1.0511822209111454, "kl": 0.3212890625, "learning_rate": 8.382743134728971e-07, "loss": 0.0021352656185626984, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1818, "train_speed(iter/s)": 0.029377 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 94.39583778381348, "completions/min_length": 40.0, "epoch": 2.710349962769918, "grad_norm": 0.005085925975745913, "kl": 0.33935546875, "learning_rate": 8.381001215887048e-07, "loss": 0.00033831200562417507, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1819, "train_speed(iter/s)": 0.029377 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 95.93750190734863, "completions/min_length": 54.25, "epoch": 2.711839166046165, "grad_norm": 0.005216162839843387, "kl": 0.3388671875, "learning_rate": 8.379258540652436e-07, "loss": 0.00033882790012285113, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1820, "train_speed(iter/s)": 0.029378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 96.96875190734863, "completions/min_length": 54.75, "epoch": 2.7133283693224124, "grad_norm": 1.4932055367873915, "kl": 0.3310546875, "learning_rate": 8.377515109415006e-07, "loss": -0.0017296632286161184, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1821, "train_speed(iter/s)": 0.029379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 93.38541984558105, "completions/min_length": 51.0, "epoch": 2.7148175725986596, "grad_norm": 2.087847512973256, "kl": 0.3369140625, "learning_rate": 8.375770922564795e-07, "loss": -0.015513631515204906, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3712640330195427, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1822, "train_speed(iter/s)": 0.02938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 99.85416793823242, "completions/min_length": 50.25, "epoch": 2.716306775874907, "grad_norm": 0.005629005310151142, "kl": 0.3271484375, "learning_rate": 8.37402598049201e-07, "loss": 0.00032766908407211304, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1823, "train_speed(iter/s)": 0.02938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 95.51041984558105, "completions/min_length": 43.5, "epoch": 2.717795979151154, "grad_norm": 0.005933578954047186, "kl": 0.33447265625, "learning_rate": 8.372280283587028e-07, "loss": 0.00033429820905439556, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1824, "train_speed(iter/s)": 0.029381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 105.33333778381348, "completions/min_length": 60.25, "epoch": 2.7192851824274014, "grad_norm": 0.5012879698999482, "kl": 0.31884765625, "learning_rate": 8.370533832240389e-07, "loss": 0.011647898703813553, "memory(GiB)": 112.53, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1825, "train_speed(iter/s)": 0.029373 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 107.69791984558105, "completions/min_length": 58.5, "epoch": 2.7207743857036486, "grad_norm": 1.1242564927906213, "kl": 0.31201171875, "learning_rate": 8.368786626842813e-07, "loss": 0.008078186772763729, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1826, "train_speed(iter/s)": 0.029369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 104.32291793823242, "completions/min_length": 55.25, "epoch": 2.722263588979896, "grad_norm": 0.7314601472913718, "kl": 0.3056640625, "learning_rate": 8.367038667785178e-07, "loss": -0.0007355426787398756, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1827, "train_speed(iter/s)": 0.029371 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 99.21875190734863, "completions/min_length": 51.0, "epoch": 2.723752792256143, "grad_norm": 0.005556498707720715, "kl": 0.3115234375, "learning_rate": 8.365289955458539e-07, "loss": 0.0003119081084150821, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1828, "train_speed(iter/s)": 0.029371 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.5, "completions/mean_length": 100.98958587646484, "completions/min_length": 51.25, "epoch": 2.7252419955323903, "grad_norm": 0.9542964256394011, "kl": 0.31298828125, "learning_rate": 8.363540490254111e-07, "loss": -0.003719592234119773, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1829, "train_speed(iter/s)": 0.029367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 101.00000381469727, "completions/min_length": 48.25, "epoch": 2.726731198808637, "grad_norm": 0.923629615590711, "kl": 0.32958984375, "learning_rate": 8.361790272563282e-07, "loss": -0.011107566766440868, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1830, "train_speed(iter/s)": 0.029368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 101.80208778381348, "completions/min_length": 51.75, "epoch": 2.728220402084885, "grad_norm": 0.8568089636904163, "kl": 0.330078125, "learning_rate": 8.360039302777611e-07, "loss": 0.00972682610154152, "memory(GiB)": 112.53, "reward": 1.6979167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1831, "train_speed(iter/s)": 0.029366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 97.14583587646484, "completions/min_length": 53.0, "epoch": 2.7297096053611316, "grad_norm": 0.005204008593065828, "kl": 0.337890625, "learning_rate": 8.358287581288822e-07, "loss": 0.0003381508286111057, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1832, "train_speed(iter/s)": 0.029361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 99.18750381469727, "completions/min_length": 57.25, "epoch": 2.731198808637379, "grad_norm": 2.6427384437781942, "kl": 0.3017578125, "learning_rate": 8.356535108488807e-07, "loss": 0.002732242923229933, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1833, "train_speed(iter/s)": 0.029361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.25, "completions/mean_length": 101.25000190734863, "completions/min_length": 56.75, "epoch": 2.732688011913626, "grad_norm": 0.8400753893907887, "kl": 0.3359375, "learning_rate": 8.354781884769624e-07, "loss": -0.003177151782438159, "memory(GiB)": 112.53, "reward": 1.5104166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5104166697710752, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1834, "train_speed(iter/s)": 0.029361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 105.53125381469727, "completions/min_length": 49.25, "epoch": 2.7341772151898733, "grad_norm": 1.767845422471542, "kl": 0.32275390625, "learning_rate": 8.353027910523506e-07, "loss": -0.007413218729197979, "memory(GiB)": 112.53, "reward": 1.4062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4062500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1835, "train_speed(iter/s)": 0.029357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.5, "completions/mean_length": 107.34375190734863, "completions/min_length": 65.0, "epoch": 2.7356664184661206, "grad_norm": 0.005460073257509056, "kl": 0.291015625, "learning_rate": 8.351273186142847e-07, "loss": 0.0002911945921368897, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1836, "train_speed(iter/s)": 0.029362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 97.62500381469727, "completions/min_length": 57.75, "epoch": 2.737155621742368, "grad_norm": 1.0569925977201844, "kl": 0.31982421875, "learning_rate": 8.349517712020211e-07, "loss": -0.008407309651374817, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.316870853304863, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1837, "train_speed(iter/s)": 0.029362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 98.93750190734863, "completions/min_length": 55.75, "epoch": 2.738644825018615, "grad_norm": 0.9722480567277489, "kl": 0.326171875, "learning_rate": 8.347761488548333e-07, "loss": 0.002817615633830428, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1838, "train_speed(iter/s)": 0.029363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 106.00000190734863, "completions/min_length": 59.25, "epoch": 2.7401340282948623, "grad_norm": 0.004993249213238681, "kl": 0.30517578125, "learning_rate": 8.34600451612011e-07, "loss": 0.0003052208630833775, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1839, "train_speed(iter/s)": 0.029364 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 100.96875190734863, "completions/min_length": 61.75, "epoch": 2.7416232315711095, "grad_norm": 0.005061791317750923, "kl": 0.30810546875, "learning_rate": 8.34424679512861e-07, "loss": 0.00030792789766564965, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1840, "train_speed(iter/s)": 0.029364 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 109.22916793823242, "completions/min_length": 50.25, "epoch": 2.743112434847357, "grad_norm": 0.0056674769226682225, "kl": 0.31494140625, "learning_rate": 8.342488325967068e-07, "loss": 0.0003147420648019761, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1841, "train_speed(iter/s)": 0.02936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.25, "completions/mean_length": 104.61458778381348, "completions/min_length": 53.25, "epoch": 2.744601638123604, "grad_norm": 0.005390888175067609, "kl": 0.32373046875, "learning_rate": 8.340729109028887e-07, "loss": 0.0003238461213186383, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1842, "train_speed(iter/s)": 0.02935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 95.72916984558105, "completions/min_length": 55.25, "epoch": 2.746090841399851, "grad_norm": 0.005135198168926445, "kl": 0.3427734375, "learning_rate": 8.338969144707634e-07, "loss": 0.0003420574066694826, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1843, "train_speed(iter/s)": 0.029342 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 108.23958396911621, "completions/min_length": 50.5, "epoch": 2.7475800446760985, "grad_norm": 0.8079715912869327, "kl": 0.298828125, "learning_rate": 8.33720843339705e-07, "loss": -0.006786522455513477, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1844, "train_speed(iter/s)": 0.029342 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 107.65625381469727, "completions/min_length": 49.25, "epoch": 2.7490692479523453, "grad_norm": 0.6420117221127359, "kl": 0.30029296875, "learning_rate": 8.335446975491034e-07, "loss": -0.00797567330300808, "memory(GiB)": 112.53, "reward": 1.5312500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1845, "train_speed(iter/s)": 0.029341 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 107.91666793823242, "completions/min_length": 58.0, "epoch": 2.7505584512285925, "grad_norm": 2.124596389029022, "kl": 0.31396484375, "learning_rate": 8.333684771383659e-07, "loss": -0.007208243943750858, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.33468010276556015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1846, "train_speed(iter/s)": 0.029346 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 103.91666984558105, "completions/min_length": 60.25, "epoch": 2.75204765450484, "grad_norm": 1.551238935614722, "kl": 0.3251953125, "learning_rate": 8.331921821469163e-07, "loss": 0.012873142026364803, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.34146176278591156, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1847, "train_speed(iter/s)": 0.029351 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 102.85416984558105, "completions/min_length": 49.75, "epoch": 2.753536857781087, "grad_norm": 0.6231448289579264, "kl": 0.32080078125, "learning_rate": 8.330158126141951e-07, "loss": -0.0062955217435956, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1848, "train_speed(iter/s)": 0.029349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 99.72916984558105, "completions/min_length": 56.5, "epoch": 2.7550260610573343, "grad_norm": 0.8597057306720227, "kl": 0.34033203125, "learning_rate": 8.328393685796593e-07, "loss": 0.015666142106056213, "memory(GiB)": 112.53, "reward": 1.6354167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1849, "train_speed(iter/s)": 0.029349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 107.76041984558105, "completions/min_length": 48.5, "epoch": 2.7565152643335815, "grad_norm": 1.0834607697078515, "kl": 0.30859375, "learning_rate": 8.326628500827825e-07, "loss": 0.026514917612075806, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.22734662145376205, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1850, "train_speed(iter/s)": 0.029343 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 102.37500381469727, "completions/min_length": 60.75, "epoch": 2.7580044676098288, "grad_norm": 0.7119153063289558, "kl": 0.314453125, "learning_rate": 8.324862571630557e-07, "loss": 0.004796369932591915, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1851, "train_speed(iter/s)": 0.029339 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.75, "completions/mean_length": 105.91666793823242, "completions/min_length": 43.75, "epoch": 2.759493670886076, "grad_norm": 1.017821322809919, "kl": 0.3173828125, "learning_rate": 8.323095898599859e-07, "loss": 0.008160032331943512, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3401750475168228, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1852, "train_speed(iter/s)": 0.029331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.5, "completions/mean_length": 100.57291793823242, "completions/min_length": 54.75, "epoch": 2.7609828741623232, "grad_norm": 1.3851703818475478, "kl": 0.31884765625, "learning_rate": 8.321328482130965e-07, "loss": 0.01459859311580658, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.3182126581668854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1853, "train_speed(iter/s)": 0.02933 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 100.93750190734863, "completions/min_length": 46.25, "epoch": 2.7624720774385705, "grad_norm": 0.005102868958469531, "kl": 0.314453125, "learning_rate": 8.31956032261928e-07, "loss": 0.00031368975760415196, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1854, "train_speed(iter/s)": 0.029331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.0, "completions/mean_length": 106.44791984558105, "completions/min_length": 48.0, "epoch": 2.7639612807148177, "grad_norm": 1.0806325334302698, "kl": 0.31591796875, "learning_rate": 8.317791420460376e-07, "loss": -0.010986406356096268, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1855, "train_speed(iter/s)": 0.029332 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 104.57291984558105, "completions/min_length": 52.0, "epoch": 2.7654504839910645, "grad_norm": 1.0429043023773013, "kl": 0.3056640625, "learning_rate": 8.316021776049989e-07, "loss": -0.0096020782366395, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1856, "train_speed(iter/s)": 0.029332 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 109.32291984558105, "completions/min_length": 53.5, "epoch": 2.766939687267312, "grad_norm": 0.8243315488979183, "kl": 0.29931640625, "learning_rate": 8.314251389784017e-07, "loss": -0.00040053314296528697, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1857, "train_speed(iter/s)": 0.029332 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.5, "completions/mean_length": 107.26041984558105, "completions/min_length": 48.75, "epoch": 2.768428890543559, "grad_norm": 1.0870735929231041, "kl": 0.30712890625, "learning_rate": 8.312480262058535e-07, "loss": 0.015768637880682945, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1858, "train_speed(iter/s)": 0.029331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.0, "completions/mean_length": 103.17708587646484, "completions/min_length": 52.5, "epoch": 2.7699180938198062, "grad_norm": 0.005172651913887069, "kl": 0.31884765625, "learning_rate": 8.310708393269772e-07, "loss": 0.000318864214932546, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1859, "train_speed(iter/s)": 0.029336 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.75, "completions/mean_length": 103.18750190734863, "completions/min_length": 56.25, "epoch": 2.7714072970960535, "grad_norm": 1.508655700631042, "kl": 0.3623046875, "learning_rate": 8.30893578381413e-07, "loss": -0.0052461083978414536, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1860, "train_speed(iter/s)": 0.029336 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 107.61458587646484, "completions/min_length": 58.5, "epoch": 2.7728965003723007, "grad_norm": 1.594400758335183, "kl": 0.2900390625, "learning_rate": 8.307162434088173e-07, "loss": -0.013805557042360306, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.39581216871738434, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1861, "train_speed(iter/s)": 0.029341 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 106.07291984558105, "completions/min_length": 47.75, "epoch": 2.774385703648548, "grad_norm": 1.131320720335772, "kl": 0.31494140625, "learning_rate": 8.305388344488635e-07, "loss": 0.015676066279411316, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1862, "train_speed(iter/s)": 0.02934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.0, "completions/mean_length": 99.17708587646484, "completions/min_length": 40.5, "epoch": 2.775874906924795, "grad_norm": 0.8189725374057938, "kl": 0.30419921875, "learning_rate": 8.303613515412411e-07, "loss": -0.008794615045189857, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1863, "train_speed(iter/s)": 0.029345 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 105.27083587646484, "completions/min_length": 53.75, "epoch": 2.7773641102010425, "grad_norm": 1.1815738245226615, "kl": 0.31103515625, "learning_rate": 8.301837947256563e-07, "loss": 0.0019002966582775116, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1864, "train_speed(iter/s)": 0.02934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.25, "completions/mean_length": 106.56250381469727, "completions/min_length": 57.0, "epoch": 2.7788533134772897, "grad_norm": 1.0510485188059393, "kl": 0.322265625, "learning_rate": 8.30006164041832e-07, "loss": 0.009162789210677147, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.3284776881337166, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1865, "train_speed(iter/s)": 0.029333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 109.26041984558105, "completions/min_length": 40.5, "epoch": 2.780342516753537, "grad_norm": 0.7042832547733108, "kl": 0.279296875, "learning_rate": 8.298284595295073e-07, "loss": -0.0026538530364632607, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1866, "train_speed(iter/s)": 0.029333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.25, "completions/mean_length": 97.05208587646484, "completions/min_length": 50.75, "epoch": 2.781831720029784, "grad_norm": 0.004776018640375225, "kl": 0.30126953125, "learning_rate": 8.29650681228438e-07, "loss": 0.0003012599190697074, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1867, "train_speed(iter/s)": 0.029338 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 111.07291984558105, "completions/min_length": 57.5, "epoch": 2.7833209233060314, "grad_norm": 0.00545603994340864, "kl": 0.30078125, "learning_rate": 8.294728291783965e-07, "loss": 0.0003003121819347143, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1868, "train_speed(iter/s)": 0.029325 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 109.00000190734863, "completions/min_length": 52.25, "epoch": 2.7848101265822782, "grad_norm": 0.7579071530548069, "kl": 0.3056640625, "learning_rate": 8.292949034191716e-07, "loss": 0.00018344118143431842, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1869, "train_speed(iter/s)": 0.029322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 106.15625190734863, "completions/min_length": 46.5, "epoch": 2.786299329858526, "grad_norm": 0.004917048269968738, "kl": 0.2900390625, "learning_rate": 8.291169039905687e-07, "loss": 0.0002895812795031816, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1870, "train_speed(iter/s)": 0.029317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 104.89583587646484, "completions/min_length": 45.5, "epoch": 2.7877885331347727, "grad_norm": 0.9121866968972007, "kl": 0.2978515625, "learning_rate": 8.289388309324093e-07, "loss": 0.009178121574223042, "memory(GiB)": 112.53, "reward": 1.6354167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1871, "train_speed(iter/s)": 0.029312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 105.01041984558105, "completions/min_length": 63.0, "epoch": 2.78927773641102, "grad_norm": 1.1754205098774937, "kl": 0.291015625, "learning_rate": 8.287606842845318e-07, "loss": -0.0058870092034339905, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3470437303185463, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1872, "train_speed(iter/s)": 0.029313 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 95.29167175292969, "completions/min_length": 50.0, "epoch": 2.790766939687267, "grad_norm": 0.004912201055925238, "kl": 0.31201171875, "learning_rate": 8.285824640867909e-07, "loss": 0.00031172329909168184, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1873, "train_speed(iter/s)": 0.029309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 106.91666984558105, "completions/min_length": 60.5, "epoch": 2.7922561429635144, "grad_norm": 0.004435196361541829, "kl": 0.284912109375, "learning_rate": 8.284041703790577e-07, "loss": 0.00028473767451941967, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1874, "train_speed(iter/s)": 0.029305 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 105.67708396911621, "completions/min_length": 56.0, "epoch": 2.7937453462397617, "grad_norm": 0.005359405560017641, "kl": 0.3056640625, "learning_rate": 8.282258032012201e-07, "loss": 0.0003054143162444234, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1875, "train_speed(iter/s)": 0.029296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 107.67708587646484, "completions/min_length": 51.0, "epoch": 2.795234549516009, "grad_norm": 1.0889499636978313, "kl": 0.291015625, "learning_rate": 8.280473625931816e-07, "loss": 0.037341199815273285, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3699222281575203, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1876, "train_speed(iter/s)": 0.029297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 102.87500381469727, "completions/min_length": 45.0, "epoch": 2.796723752792256, "grad_norm": 1.259292514590627, "kl": 0.3408203125, "learning_rate": 8.278688485948633e-07, "loss": -0.01647721603512764, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.4382359981536865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1877, "train_speed(iter/s)": 0.029292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 100.26041984558105, "completions/min_length": 51.75, "epoch": 2.7982129560685034, "grad_norm": 0.7160476399414091, "kl": 0.30126953125, "learning_rate": 8.276902612462015e-07, "loss": 0.019039403647184372, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1878, "train_speed(iter/s)": 0.029296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 98.02083587646484, "completions/min_length": 51.75, "epoch": 2.7997021593447506, "grad_norm": 0.005650485278260281, "kl": 0.28564453125, "learning_rate": 8.275116005871501e-07, "loss": 0.0002850193122867495, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1879, "train_speed(iter/s)": 0.029297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 103.05208587646484, "completions/min_length": 40.25, "epoch": 2.801191362620998, "grad_norm": 0.7945007029695631, "kl": 0.29443359375, "learning_rate": 8.273328666576782e-07, "loss": -0.0002330787683604285, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1880, "train_speed(iter/s)": 0.029294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.25, "completions/mean_length": 104.34375381469727, "completions/min_length": 62.0, "epoch": 2.802680565897245, "grad_norm": 1.4926391322014287, "kl": 0.29736328125, "learning_rate": 8.271540594977724e-07, "loss": -0.0041743116453289986, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.45113223791122437, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1881, "train_speed(iter/s)": 0.029283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.25, "completions/mean_length": 106.83333587646484, "completions/min_length": 55.5, "epoch": 2.804169769173492, "grad_norm": 0.005234927206863596, "kl": 0.29296875, "learning_rate": 8.269751791474348e-07, "loss": 0.0002927909081336111, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1882, "train_speed(iter/s)": 0.029279 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.5, "completions/mean_length": 106.54166984558105, "completions/min_length": 55.0, "epoch": 2.8056589724497396, "grad_norm": 0.7878367845467543, "kl": 0.287109375, "learning_rate": 8.267962256466845e-07, "loss": 0.016878977417945862, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1883, "train_speed(iter/s)": 0.029279 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 115.05208587646484, "completions/min_length": 60.75, "epoch": 2.8071481757259864, "grad_norm": 0.9291333718076632, "kl": 0.28369140625, "learning_rate": 8.266171990355566e-07, "loss": -0.00584432203322649, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1884, "train_speed(iter/s)": 0.029274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 99.36458587646484, "completions/min_length": 52.0, "epoch": 2.8086373790022336, "grad_norm": 4.075458440458132, "kl": 0.3173828125, "learning_rate": 8.264380993541027e-07, "loss": -0.02822837047278881, "memory(GiB)": 112.53, "reward": 1.5208334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1885, "train_speed(iter/s)": 0.029275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 97.09375381469727, "completions/min_length": 41.5, "epoch": 2.810126582278481, "grad_norm": 0.006068930271058835, "kl": 0.30908203125, "learning_rate": 8.262589266423908e-07, "loss": 0.00030942013836465776, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1886, "train_speed(iter/s)": 0.029276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 110.28125190734863, "completions/min_length": 50.0, "epoch": 2.811615785554728, "grad_norm": 0.00549268380167699, "kl": 0.2822265625, "learning_rate": 8.26079680940505e-07, "loss": 0.00028223171830177307, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1887, "train_speed(iter/s)": 0.029267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 98.52083587646484, "completions/min_length": 47.5, "epoch": 2.8131049888309754, "grad_norm": 0.8879825578922286, "kl": 0.29443359375, "learning_rate": 8.259003622885461e-07, "loss": -0.011344294995069504, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1888, "train_speed(iter/s)": 0.029271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 100.68750190734863, "completions/min_length": 48.0, "epoch": 2.8145941921072226, "grad_norm": 1.576012101494235, "kl": 0.30859375, "learning_rate": 8.257209707266307e-07, "loss": -0.0005818711360916495, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.16161249950528145, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1889, "train_speed(iter/s)": 0.029263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 96.82291984558105, "completions/min_length": 47.75, "epoch": 2.81608339538347, "grad_norm": 0.005135685870565407, "kl": 0.28955078125, "learning_rate": 8.255415062948924e-07, "loss": 0.0002891987096518278, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1890, "train_speed(iter/s)": 0.02926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.5, "completions/mean_length": 98.32291984558105, "completions/min_length": 42.25, "epoch": 2.817572598659717, "grad_norm": 1.714944524569624, "kl": 0.298828125, "learning_rate": 8.253619690334804e-07, "loss": 0.017855443060398102, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.4306785687804222, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1891, "train_speed(iter/s)": 0.029256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 94.92708778381348, "completions/min_length": 44.25, "epoch": 2.8190618019359643, "grad_norm": 0.7854097627389102, "kl": 0.310546875, "learning_rate": 8.251823589825608e-07, "loss": 0.008287763223052025, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1892, "train_speed(iter/s)": 0.029252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 96.30208587646484, "completions/min_length": 39.25, "epoch": 2.8205510052122116, "grad_norm": 0.005482399686158807, "kl": 0.30078125, "learning_rate": 8.250026761823153e-07, "loss": 0.00030070089269429445, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1893, "train_speed(iter/s)": 0.029256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.25, "completions/mean_length": 98.79166793823242, "completions/min_length": 41.25, "epoch": 2.822040208488459, "grad_norm": 0.7756468625875286, "kl": 0.30419921875, "learning_rate": 8.248229206729426e-07, "loss": 0.005166094750165939, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1894, "train_speed(iter/s)": 0.029258 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 95.39583587646484, "completions/min_length": 28.75, "epoch": 2.8235294117647056, "grad_norm": 1.4538811891739125, "kl": 0.326171875, "learning_rate": 8.246430924946573e-07, "loss": -0.010217898525297642, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.1034691073000431, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.26983824744820595, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1895, "train_speed(iter/s)": 0.029258 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 89.65625190734863, "completions/min_length": 38.5, "epoch": 2.8250186150409533, "grad_norm": 0.0057644277983200515, "kl": 0.3232421875, "learning_rate": 8.244631916876903e-07, "loss": 0.0003229337453376502, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1896, "train_speed(iter/s)": 0.029263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 109.34375190734863, "completions/min_length": 54.5, "epoch": 2.8265078183172, "grad_norm": 1.3629433818130348, "kl": 0.281005859375, "learning_rate": 8.242832182922887e-07, "loss": -0.005556974094361067, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.35865580290555954, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1897, "train_speed(iter/s)": 0.029267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 98.30208587646484, "completions/min_length": 52.0, "epoch": 2.8279970215934473, "grad_norm": 0.6059737375039866, "kl": 0.30078125, "learning_rate": 8.24103172348716e-07, "loss": -0.011970441788434982, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1898, "train_speed(iter/s)": 0.029266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 109.60416984558105, "completions/min_length": 53.75, "epoch": 2.8294862248696946, "grad_norm": 0.005368275840433447, "kl": 0.29150390625, "learning_rate": 8.239230538972515e-07, "loss": 0.00029204325983300805, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1899, "train_speed(iter/s)": 0.029266 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 103.64583396911621, "completions/min_length": 46.25, "epoch": 2.830975428145942, "grad_norm": 1.3645245478367551, "kl": 0.29736328125, "learning_rate": 8.237428629781913e-07, "loss": -0.005565769504755735, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4154365845024586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1900, "train_speed(iter/s)": 0.029263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 390.0, "completions/mean_length": 107.37500190734863, "completions/min_length": 49.0, "epoch": 2.832464631422189, "grad_norm": 0.005396045300767163, "kl": 0.32861328125, "learning_rate": 8.235625996318474e-07, "loss": 0.00032855785684660077, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1901, "train_speed(iter/s)": 0.029255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 99.47916793823242, "completions/min_length": 38.0, "epoch": 2.8339538346984363, "grad_norm": 0.6874163486691139, "kl": 0.2998046875, "learning_rate": 8.233822638985482e-07, "loss": -0.005076131783425808, "memory(GiB)": 112.53, "reward": 1.5312500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1902, "train_speed(iter/s)": 0.029255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 90.75000381469727, "completions/min_length": 45.5, "epoch": 2.8354430379746836, "grad_norm": 0.7421886241796367, "kl": 0.333984375, "learning_rate": 8.232018558186377e-07, "loss": -0.002366314409300685, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1903, "train_speed(iter/s)": 0.02926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 100.36458587646484, "completions/min_length": 42.75, "epoch": 2.836932241250931, "grad_norm": 1.1825029118106694, "kl": 0.2998046875, "learning_rate": 8.230213754324772e-07, "loss": 0.002113528549671173, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1904, "train_speed(iter/s)": 0.02926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 101.83333587646484, "completions/min_length": 48.25, "epoch": 2.838421444527178, "grad_norm": 0.904410860935575, "kl": 0.3046875, "learning_rate": 8.22840822780443e-07, "loss": 0.0035773171111941338, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1905, "train_speed(iter/s)": 0.029257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 97.18750190734863, "completions/min_length": 46.0, "epoch": 2.8399106478034253, "grad_norm": 1.4238614507721297, "kl": 0.33740234375, "learning_rate": 8.226601979029281e-07, "loss": -0.0140294274315238, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1906, "train_speed(iter/s)": 0.029249 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 90.04166984558105, "completions/min_length": 37.0, "epoch": 2.8413998510796725, "grad_norm": 0.9111835452542749, "kl": 0.32666015625, "learning_rate": 8.224795008403419e-07, "loss": -0.0013687647879123688, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1907, "train_speed(iter/s)": 0.029247 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 96.28125381469727, "completions/min_length": 44.0, "epoch": 2.8428890543559193, "grad_norm": 0.6843296988296176, "kl": 0.32080078125, "learning_rate": 8.222987316331096e-07, "loss": 0.011186189018189907, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1908, "train_speed(iter/s)": 0.029243 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 95.67708587646484, "completions/min_length": 42.0, "epoch": 2.844378257632167, "grad_norm": 0.00665301948630377, "kl": 0.310546875, "learning_rate": 8.221178903216724e-07, "loss": 0.00031126453541219234, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1909, "train_speed(iter/s)": 0.029244 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.5, "completions/mean_length": 103.41666984558105, "completions/min_length": 47.25, "epoch": 2.845867460908414, "grad_norm": 0.9774178769270456, "kl": 0.31298828125, "learning_rate": 8.219369769464883e-07, "loss": -0.017665669322013855, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1910, "train_speed(iter/s)": 0.029244 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 96.77083396911621, "completions/min_length": 51.5, "epoch": 2.847356664184661, "grad_norm": 0.004669456700672381, "kl": 0.3076171875, "learning_rate": 8.217559915480305e-07, "loss": 0.0003068764344789088, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1911, "train_speed(iter/s)": 0.02924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 99.87500190734863, "completions/min_length": 41.25, "epoch": 2.8488458674609083, "grad_norm": 0.6437778851766641, "kl": 0.298828125, "learning_rate": 8.21574934166789e-07, "loss": 0.011629532091319561, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1912, "train_speed(iter/s)": 0.02924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 90.30208587646484, "completions/min_length": 44.25, "epoch": 2.8503350707371555, "grad_norm": 0.0050792535073444275, "kl": 0.3095703125, "learning_rate": 8.213938048432696e-07, "loss": 0.0003092510160058737, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1913, "train_speed(iter/s)": 0.02924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 91.63541984558105, "completions/min_length": 38.75, "epoch": 2.8518242740134028, "grad_norm": 1.1405211629433682, "kl": 0.3349609375, "learning_rate": 8.212126036179944e-07, "loss": -0.0035784258507192135, "memory(GiB)": 112.53, "reward": 1.6458333432674408, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.3284776881337166, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1914, "train_speed(iter/s)": 0.029237 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.25, "completions/mean_length": 105.61458587646484, "completions/min_length": 59.0, "epoch": 2.85331347728965, "grad_norm": 0.004761809878664775, "kl": 0.3017578125, "learning_rate": 8.210313305315017e-07, "loss": 0.0003017419658135623, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1915, "train_speed(iter/s)": 0.029237 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 96.14583396911621, "completions/min_length": 59.25, "epoch": 2.8548026805658973, "grad_norm": 1.1865964805707025, "kl": 0.32275390625, "learning_rate": 8.208499856243452e-07, "loss": 0.008173526264727116, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3600961044430733, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1916, "train_speed(iter/s)": 0.02924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 92.42708587646484, "completions/min_length": 45.75, "epoch": 2.8562918838421445, "grad_norm": 0.8239815601164685, "kl": 0.3369140625, "learning_rate": 8.206685689370954e-07, "loss": -0.003399503882974386, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1917, "train_speed(iter/s)": 0.029232 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 105.13541793823242, "completions/min_length": 59.25, "epoch": 2.8577810871183917, "grad_norm": 0.004819101225298854, "kl": 0.302734375, "learning_rate": 8.204870805103386e-07, "loss": 0.00030298338970169425, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1918, "train_speed(iter/s)": 0.029228 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 97.44791793823242, "completions/min_length": 48.5, "epoch": 2.859270290394639, "grad_norm": 0.7496944824864361, "kl": 0.32470703125, "learning_rate": 8.203055203846769e-07, "loss": -0.00907074473798275, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1919, "train_speed(iter/s)": 0.029224 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 96.95833587646484, "completions/min_length": 44.25, "epoch": 2.8607594936708862, "grad_norm": 0.9877245553729829, "kl": 0.31689453125, "learning_rate": 8.201238886007287e-07, "loss": -0.0023350180126726627, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1920, "train_speed(iter/s)": 0.029221 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 89.89583587646484, "completions/min_length": 51.25, "epoch": 2.862248696947133, "grad_norm": 0.6235559693557534, "kl": 0.3330078125, "learning_rate": 8.199421851991288e-07, "loss": 0.0061483075842261314, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1921, "train_speed(iter/s)": 0.029222 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.75, "completions/mean_length": 97.95833587646484, "completions/min_length": 46.5, "epoch": 2.8637379002233807, "grad_norm": 0.00478907625137012, "kl": 0.30126953125, "learning_rate": 8.19760410220527e-07, "loss": 0.00030061579309403896, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1922, "train_speed(iter/s)": 0.029216 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 94.03125381469727, "completions/min_length": 56.25, "epoch": 2.8652271034996275, "grad_norm": 0.7638169412158878, "kl": 0.32275390625, "learning_rate": 8.195785637055903e-07, "loss": -0.0037269596941769123, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1923, "train_speed(iter/s)": 0.029208 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 102.21875190734863, "completions/min_length": 41.5, "epoch": 2.8667163067758747, "grad_norm": 0.9304436493338517, "kl": 0.3125, "learning_rate": 8.193966456950007e-07, "loss": -0.011228734627366066, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1924, "train_speed(iter/s)": 0.029211 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 102.61458778381348, "completions/min_length": 57.75, "epoch": 2.868205510052122, "grad_norm": 0.6371539999837983, "kl": 0.33154296875, "learning_rate": 8.192146562294569e-07, "loss": 0.011635858565568924, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1925, "train_speed(iter/s)": 0.029216 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.25, "completions/mean_length": 104.79166984558105, "completions/min_length": 54.25, "epoch": 2.8696947133283692, "grad_norm": 0.005163900542837418, "kl": 0.31396484375, "learning_rate": 8.190325953496733e-07, "loss": 0.0003140502085443586, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1926, "train_speed(iter/s)": 0.029211 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 98.54166984558105, "completions/min_length": 53.75, "epoch": 2.8711839166046165, "grad_norm": 0.004522430375828998, "kl": 0.33544921875, "learning_rate": 8.188504630963802e-07, "loss": 0.00033564958721399307, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1927, "train_speed(iter/s)": 0.029208 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.5, "completions/mean_length": 99.97916793823242, "completions/min_length": 56.0, "epoch": 2.8726731198808637, "grad_norm": 1.1783480590836228, "kl": 0.33544921875, "learning_rate": 8.186682595103241e-07, "loss": 0.002983769401907921, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.5416666772216558, "rewards/CineAccuracyORM/std": 0.316870853304863, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1928, "train_speed(iter/s)": 0.0292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 105.28125381469727, "completions/min_length": 53.5, "epoch": 2.874162323157111, "grad_norm": 0.8680544233594334, "kl": 0.3251953125, "learning_rate": 8.184859846322672e-07, "loss": 0.004680251702666283, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1929, "train_speed(iter/s)": 0.029194 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 102.35416984558105, "completions/min_length": 53.75, "epoch": 2.875651526433358, "grad_norm": 0.651291291400017, "kl": 0.345703125, "learning_rate": 8.183036385029877e-07, "loss": 0.007044236641377211, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1930, "train_speed(iter/s)": 0.029189 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 106.84375381469727, "completions/min_length": 48.25, "epoch": 2.8771407297096054, "grad_norm": 0.5790084979089772, "kl": 0.29248046875, "learning_rate": 8.181212211632798e-07, "loss": 0.005079666152596474, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1931, "train_speed(iter/s)": 0.029184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 111.78125190734863, "completions/min_length": 56.25, "epoch": 2.8786299329858527, "grad_norm": 0.8439242324743655, "kl": 0.287109375, "learning_rate": 8.179387326539539e-07, "loss": -0.0023277464788407087, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1932, "train_speed(iter/s)": 0.029188 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 108.36458587646484, "completions/min_length": 55.75, "epoch": 2.8801191362621, "grad_norm": 0.6482195718612652, "kl": 0.30859375, "learning_rate": 8.17756173015836e-07, "loss": -0.016568094491958618, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1933, "train_speed(iter/s)": 0.029188 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 106.20833396911621, "completions/min_length": 54.5, "epoch": 2.8816083395383467, "grad_norm": 0.8582893346527494, "kl": 0.31640625, "learning_rate": 8.17573542289768e-07, "loss": 0.014970585703849792, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1934, "train_speed(iter/s)": 0.029187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 106.32291984558105, "completions/min_length": 53.75, "epoch": 2.8830975428145944, "grad_norm": 0.6042795873416763, "kl": 0.30615234375, "learning_rate": 8.173908405166076e-07, "loss": 0.002023941371589899, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1935, "train_speed(iter/s)": 0.029186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 107.22916984558105, "completions/min_length": 57.75, "epoch": 2.884586746090841, "grad_norm": 1.3414636241917148, "kl": 0.30859375, "learning_rate": 8.172080677372289e-07, "loss": -0.0012788748135790229, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.10205466859042645, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.4920940324664116, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1936, "train_speed(iter/s)": 0.029179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.25, "completions/mean_length": 105.30208587646484, "completions/min_length": 49.5, "epoch": 2.8860759493670884, "grad_norm": 0.5379289012622059, "kl": 0.3203125, "learning_rate": 8.170252239925214e-07, "loss": -0.0037722494453191757, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1937, "train_speed(iter/s)": 0.029173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.75, "completions/mean_length": 106.44791984558105, "completions/min_length": 60.5, "epoch": 2.8875651526433357, "grad_norm": 0.0050000356542341685, "kl": 0.29443359375, "learning_rate": 8.168423093233907e-07, "loss": 0.00029423864907585084, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1938, "train_speed(iter/s)": 0.029169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.25, "completions/mean_length": 99.79166793823242, "completions/min_length": 48.0, "epoch": 2.889054355919583, "grad_norm": 0.8023384394319769, "kl": 0.31201171875, "learning_rate": 8.166593237707579e-07, "loss": -0.0034469650126993656, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1939, "train_speed(iter/s)": 0.029169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 107.97916793823242, "completions/min_length": 54.25, "epoch": 2.89054355919583, "grad_norm": 0.983850276117544, "kl": 0.301025390625, "learning_rate": 8.164762673755609e-07, "loss": -0.003584241261705756, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1940, "train_speed(iter/s)": 0.029168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 109.79166984558105, "completions/min_length": 51.25, "epoch": 2.8920327624720774, "grad_norm": 0.7878012710244974, "kl": 0.2890625, "learning_rate": 8.16293140178752e-07, "loss": -0.011080957017838955, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1941, "train_speed(iter/s)": 0.029163 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 107.13542175292969, "completions/min_length": 53.0, "epoch": 2.8935219657483247, "grad_norm": 0.005810960069263407, "kl": 0.29833984375, "learning_rate": 8.161099422213009e-07, "loss": 0.00029829866252839565, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1942, "train_speed(iter/s)": 0.029167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 105.92708587646484, "completions/min_length": 53.0, "epoch": 2.895011169024572, "grad_norm": 0.6470901198023603, "kl": 0.302734375, "learning_rate": 8.159266735441921e-07, "loss": 0.010120775550603867, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1943, "train_speed(iter/s)": 0.029171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.5, "completions/mean_length": 122.81250381469727, "completions/min_length": 48.75, "epoch": 2.896500372300819, "grad_norm": 0.8097394159789325, "kl": 0.27880859375, "learning_rate": 8.15743334188426e-07, "loss": 0.002446591854095459, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1944, "train_speed(iter/s)": 0.029173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 109.76042175292969, "completions/min_length": 55.25, "epoch": 2.8979895755770664, "grad_norm": 0.004526108371113771, "kl": 0.29541015625, "learning_rate": 8.155599241950191e-07, "loss": 0.0002951291389763355, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1945, "train_speed(iter/s)": 0.029169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 106.13541984558105, "completions/min_length": 50.25, "epoch": 2.8994787788533136, "grad_norm": 0.632762092071323, "kl": 0.3134765625, "learning_rate": 8.153764436050039e-07, "loss": 0.01284624170511961, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1946, "train_speed(iter/s)": 0.029168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 107.67708587646484, "completions/min_length": 48.75, "epoch": 2.9009679821295604, "grad_norm": 1.2649420427402065, "kl": 0.29638671875, "learning_rate": 8.15192892459428e-07, "loss": -0.0031643970869481564, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4445540979504585, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1947, "train_speed(iter/s)": 0.029168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.75, "completions/mean_length": 116.66666984558105, "completions/min_length": 46.75, "epoch": 2.902457185405808, "grad_norm": 0.9596240264245707, "kl": 0.28271484375, "learning_rate": 8.150092707993557e-07, "loss": -0.0023874491453170776, "memory(GiB)": 112.53, "reward": 1.3958333730697632, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.3958333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1948, "train_speed(iter/s)": 0.029164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.75, "completions/mean_length": 103.73958587646484, "completions/min_length": 49.5, "epoch": 2.903946388682055, "grad_norm": 0.005327605506547022, "kl": 0.306640625, "learning_rate": 8.14825578665866e-07, "loss": 0.0003063273907173425, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1949, "train_speed(iter/s)": 0.029164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 106.31250190734863, "completions/min_length": 39.5, "epoch": 2.905435591958302, "grad_norm": 1.9473821170033863, "kl": 0.30517578125, "learning_rate": 8.146418161000547e-07, "loss": -0.023817777633666992, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.3015497848391533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1950, "train_speed(iter/s)": 0.02916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 106.44791984558105, "completions/min_length": 51.0, "epoch": 2.9069247952345494, "grad_norm": 1.2263428420285014, "kl": 0.31494140625, "learning_rate": 8.144579831430325e-07, "loss": 0.017791619524359703, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.11713542230427265, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3282228857278824, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1951, "train_speed(iter/s)": 0.029161 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 107.06250381469727, "completions/min_length": 51.5, "epoch": 2.9084139985107966, "grad_norm": 0.5367973612245572, "kl": 0.29150390625, "learning_rate": 8.142740798359266e-07, "loss": 0.002825309056788683, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1952, "train_speed(iter/s)": 0.029157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.5, "completions/mean_length": 100.50000381469727, "completions/min_length": 47.75, "epoch": 2.909903201787044, "grad_norm": 1.0221227463303841, "kl": 0.298828125, "learning_rate": 8.140901062198794e-07, "loss": 0.001123638590797782, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1953, "train_speed(iter/s)": 0.029154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/mean_length": 114.97916984558105, "completions/min_length": 41.75, "epoch": 2.911392405063291, "grad_norm": 0.643118080320924, "kl": 0.30908203125, "learning_rate": 8.139060623360493e-07, "loss": 0.01434721052646637, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1954, "train_speed(iter/s)": 0.029153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.75, "completions/mean_length": 96.85416793823242, "completions/min_length": 48.0, "epoch": 2.9128816083395384, "grad_norm": 1.0713463860759842, "kl": 0.2890625, "learning_rate": 8.137219482256101e-07, "loss": -0.0045610107481479645, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1955, "train_speed(iter/s)": 0.029152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 105.54166984558105, "completions/min_length": 38.25, "epoch": 2.9143708116157856, "grad_norm": 0.005440115495218942, "kl": 0.2958984375, "learning_rate": 8.135377639297519e-07, "loss": 0.0002961689024232328, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1956, "train_speed(iter/s)": 0.029148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 109.26041793823242, "completions/min_length": 46.5, "epoch": 2.915860014892033, "grad_norm": 1.3081312898997846, "kl": 0.3037109375, "learning_rate": 8.1335350948968e-07, "loss": 0.007090953644365072, "memory(GiB)": 112.53, "reward": 1.4895833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.4895833507180214, "rewards/CineAccuracyORM/std": 0.49030808359384537, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1957, "train_speed(iter/s)": 0.029147 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 103.76041793823242, "completions/min_length": 48.75, "epoch": 2.91734921816828, "grad_norm": 1.3829646434760832, "kl": 0.3154296875, "learning_rate": 8.131691849466152e-07, "loss": 0.020947318524122238, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7708333507180214, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1958, "train_speed(iter/s)": 0.029142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 105.40625190734863, "completions/min_length": 50.5, "epoch": 2.9188384214445273, "grad_norm": 0.9106495405661074, "kl": 0.31298828125, "learning_rate": 8.129847903417949e-07, "loss": -0.015158376656472683, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.46742958575487137, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1959, "train_speed(iter/s)": 0.029144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 102.94791984558105, "completions/min_length": 45.75, "epoch": 2.920327624720774, "grad_norm": 0.6392001405160139, "kl": 0.3046875, "learning_rate": 8.128003257164711e-07, "loss": 0.013256451115012169, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1960, "train_speed(iter/s)": 0.029144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.5, "completions/mean_length": 95.00000190734863, "completions/min_length": 48.5, "epoch": 2.921816827997022, "grad_norm": 1.2461804842392663, "kl": 0.33935546875, "learning_rate": 8.126157911119123e-07, "loss": -0.0038686320185661316, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1961, "train_speed(iter/s)": 0.029145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 105.23958587646484, "completions/min_length": 47.25, "epoch": 2.9233060312732686, "grad_norm": 0.005659269824229996, "kl": 0.3017578125, "learning_rate": 8.124311865694022e-07, "loss": 0.0003013137320522219, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1962, "train_speed(iter/s)": 0.029145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 110.25000190734863, "completions/min_length": 47.0, "epoch": 2.924795234549516, "grad_norm": 0.00559609791771669, "kl": 0.29248046875, "learning_rate": 8.122465121302402e-07, "loss": 0.00029270444065332413, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1963, "train_speed(iter/s)": 0.029149 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 112.67708587646484, "completions/min_length": 44.75, "epoch": 2.926284437825763, "grad_norm": 0.42865594143515695, "kl": 0.30029296875, "learning_rate": 8.120617678357413e-07, "loss": -0.018236694857478142, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1964, "train_speed(iter/s)": 0.029144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 101.16666984558105, "completions/min_length": 43.75, "epoch": 2.9277736411020103, "grad_norm": 0.0057989963729833645, "kl": 0.29345703125, "learning_rate": 8.118769537272365e-07, "loss": 0.0002938056131824851, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1965, "train_speed(iter/s)": 0.029148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 107.42708587646484, "completions/min_length": 36.0, "epoch": 2.9292628443782576, "grad_norm": 1.2863861489005548, "kl": 0.30712890625, "learning_rate": 8.116920698460721e-07, "loss": -0.004027757793664932, "memory(GiB)": 112.53, "reward": 1.6458333432674408, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.22734662145376205, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1966, "train_speed(iter/s)": 0.029144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.5, "completions/mean_length": 108.04166984558105, "completions/min_length": 55.25, "epoch": 2.930752047654505, "grad_norm": 0.005068831512427066, "kl": 0.296875, "learning_rate": 8.115071162336097e-07, "loss": 0.00029685202753171325, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1967, "train_speed(iter/s)": 0.029148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.25, "completions/mean_length": 102.22916793823242, "completions/min_length": 46.75, "epoch": 2.932241250930752, "grad_norm": 0.5343594074444086, "kl": 0.31103515625, "learning_rate": 8.113220929312274e-07, "loss": -0.014132924377918243, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1968, "train_speed(iter/s)": 0.029148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 107.70833587646484, "completions/min_length": 46.25, "epoch": 2.9337304542069993, "grad_norm": 1.45145357606577, "kl": 0.306640625, "learning_rate": 8.111369999803179e-07, "loss": -0.01884869858622551, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1969, "train_speed(iter/s)": 0.029152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 102.16666984558105, "completions/min_length": 48.0, "epoch": 2.9352196574832465, "grad_norm": 0.005931060552634405, "kl": 0.33154296875, "learning_rate": 8.109518374222901e-07, "loss": 0.0003314653586130589, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1970, "train_speed(iter/s)": 0.029145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 106.39583587646484, "completions/min_length": 58.0, "epoch": 2.9367088607594938, "grad_norm": 1.4156706674935047, "kl": 0.294921875, "learning_rate": 8.107666052985683e-07, "loss": 0.026492660865187645, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1971, "train_speed(iter/s)": 0.029145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 102.92708587646484, "completions/min_length": 38.75, "epoch": 2.938198064035741, "grad_norm": 0.899137581218341, "kl": 0.31005859375, "learning_rate": 8.105813036505924e-07, "loss": -0.0009250999428331852, "memory(GiB)": 112.53, "reward": 1.7083334028720856, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1972, "train_speed(iter/s)": 0.029148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 120.39583778381348, "completions/min_length": 53.5, "epoch": 2.939687267311988, "grad_norm": 0.6495800485166928, "kl": 0.30224609375, "learning_rate": 8.103959325198177e-07, "loss": -0.01311182789504528, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1973, "train_speed(iter/s)": 0.029152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 100.39583587646484, "completions/min_length": 49.0, "epoch": 2.9411764705882355, "grad_norm": 0.005461520509981469, "kl": 0.3173828125, "learning_rate": 8.102104919477153e-07, "loss": 0.000317124358844012, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1974, "train_speed(iter/s)": 0.029153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 115.27083587646484, "completions/min_length": 56.0, "epoch": 2.9426656738644823, "grad_norm": 1.1201978362924458, "kl": 0.3212890625, "learning_rate": 8.100249819757714e-07, "loss": 0.008418943732976913, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1975, "train_speed(iter/s)": 0.029153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 114.17708587646484, "completions/min_length": 54.25, "epoch": 2.94415487714073, "grad_norm": 1.0780683694117312, "kl": 0.28662109375, "learning_rate": 8.098394026454884e-07, "loss": 0.0013307365588843822, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7187500298023224, "rewards/CineAccuracyORM/std": 0.4520214945077896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1976, "train_speed(iter/s)": 0.029152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 112.64583778381348, "completions/min_length": 57.25, "epoch": 2.945644080416977, "grad_norm": 1.380269767311411, "kl": 0.3076171875, "learning_rate": 8.096537539983838e-07, "loss": 0.004007586278021336, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1977, "train_speed(iter/s)": 0.029146 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 105.55208587646484, "completions/min_length": 50.25, "epoch": 2.947133283693224, "grad_norm": 0.005317320792814079, "kl": 0.3125, "learning_rate": 8.094680360759904e-07, "loss": 0.00031328987097367644, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1978, "train_speed(iter/s)": 0.02915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.5, "completions/mean_length": 106.85416984558105, "completions/min_length": 51.0, "epoch": 2.9486224869694713, "grad_norm": 0.005617764393337548, "kl": 0.3134765625, "learning_rate": 8.092822489198568e-07, "loss": 0.00031400410807691514, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1979, "train_speed(iter/s)": 0.029148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 120.55208587646484, "completions/min_length": 66.5, "epoch": 2.9501116902457185, "grad_norm": 0.483782818352484, "kl": 0.28466796875, "learning_rate": 8.090963925715473e-07, "loss": -0.007242165505886078, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1980, "train_speed(iter/s)": 0.029141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 117.90625381469727, "completions/min_length": 60.0, "epoch": 2.9516008935219658, "grad_norm": 0.0052309589359032, "kl": 0.29248046875, "learning_rate": 8.08910467072641e-07, "loss": 0.00029266218189150095, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1981, "train_speed(iter/s)": 0.029139 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 103.84375381469727, "completions/min_length": 61.75, "epoch": 2.953090096798213, "grad_norm": 0.006033824685682136, "kl": 0.33056640625, "learning_rate": 8.087244724647332e-07, "loss": 0.00032977404771372676, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1982, "train_speed(iter/s)": 0.029144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 110.87500190734863, "completions/min_length": 50.75, "epoch": 2.9545793000744602, "grad_norm": 1.0262066502177838, "kl": 0.3046875, "learning_rate": 8.085384087894342e-07, "loss": 0.004564645234495401, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1983, "train_speed(iter/s)": 0.029147 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 113.23958587646484, "completions/min_length": 52.75, "epoch": 2.9560685033507075, "grad_norm": 0.9496967375257357, "kl": 0.32177734375, "learning_rate": 8.083522760883698e-07, "loss": -0.0015919780125841498, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.45113223791122437, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1984, "train_speed(iter/s)": 0.029147 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 111.15625190734863, "completions/min_length": 62.75, "epoch": 2.9575577066269547, "grad_norm": 0.005814492340214171, "kl": 0.302978515625, "learning_rate": 8.081660744031818e-07, "loss": 0.0003035855188500136, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1985, "train_speed(iter/s)": 0.029143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 120.17708587646484, "completions/min_length": 72.5, "epoch": 2.9590469099032015, "grad_norm": 0.7823363645029963, "kl": 0.28466796875, "learning_rate": 8.079798037755266e-07, "loss": -0.0035483974497765303, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1986, "train_speed(iter/s)": 0.029137 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 112.75000381469727, "completions/min_length": 57.25, "epoch": 2.960536113179449, "grad_norm": 0.9957775172300999, "kl": 0.3076171875, "learning_rate": 8.077934642470763e-07, "loss": 0.000997394323348999, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1987, "train_speed(iter/s)": 0.029133 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 113.52083778381348, "completions/min_length": 58.0, "epoch": 2.962025316455696, "grad_norm": 1.1524326203715285, "kl": 0.2919921875, "learning_rate": 8.076070558595187e-07, "loss": 0.00373933557420969, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1988, "train_speed(iter/s)": 0.029137 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.25, "completions/mean_length": 111.44791984558105, "completions/min_length": 60.0, "epoch": 2.9635145197319437, "grad_norm": 0.6658395428155558, "kl": 0.31298828125, "learning_rate": 8.074205786545567e-07, "loss": -0.010053225792944431, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1989, "train_speed(iter/s)": 0.029141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.5, "completions/mean_length": 109.41666984558105, "completions/min_length": 51.75, "epoch": 2.9650037230081905, "grad_norm": 0.005982131024932991, "kl": 0.3017578125, "learning_rate": 8.072340326739088e-07, "loss": 0.00030196463922038674, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1990, "train_speed(iter/s)": 0.029145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 108.44791984558105, "completions/min_length": 60.5, "epoch": 2.9664929262844377, "grad_norm": 0.6580423372137343, "kl": 0.30419921875, "learning_rate": 8.070474179593088e-07, "loss": 0.00855710357427597, "memory(GiB)": 112.53, "reward": 1.354166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.354166679084301, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1991, "train_speed(iter/s)": 0.029145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.5, "completions/mean_length": 113.78125381469727, "completions/min_length": 53.0, "epoch": 2.967982129560685, "grad_norm": 0.6082267845805441, "kl": 0.30712890625, "learning_rate": 8.068607345525059e-07, "loss": -0.0018790930043905973, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1992, "train_speed(iter/s)": 0.029145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.25, "completions/mean_length": 108.80208587646484, "completions/min_length": 58.75, "epoch": 2.969471332836932, "grad_norm": 0.7719751595235883, "kl": 0.31884765625, "learning_rate": 8.066739824952644e-07, "loss": 0.007098143920302391, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1993, "train_speed(iter/s)": 0.029137 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 102.59375381469727, "completions/min_length": 57.5, "epoch": 2.9709605361131795, "grad_norm": 0.636164438077748, "kl": 0.33642578125, "learning_rate": 8.064871618293645e-07, "loss": 0.0027459044940769672, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1994, "train_speed(iter/s)": 0.029133 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 109.02083587646484, "completions/min_length": 52.0, "epoch": 2.9724497393894267, "grad_norm": 1.4804030335633518, "kl": 0.30322265625, "learning_rate": 8.063002725966014e-07, "loss": 0.017284825444221497, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1995, "train_speed(iter/s)": 0.029128 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 116.14583778381348, "completions/min_length": 65.0, "epoch": 2.973938942665674, "grad_norm": 0.005312504429322285, "kl": 0.29345703125, "learning_rate": 8.061133148387855e-07, "loss": 0.00029338261811062694, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1996, "train_speed(iter/s)": 0.029124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 100.43750190734863, "completions/min_length": 58.0, "epoch": 2.975428145941921, "grad_norm": 0.005732849848836057, "kl": 0.310546875, "learning_rate": 8.059262885977429e-07, "loss": 0.0003104600473307073, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1997, "train_speed(iter/s)": 0.029129 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 107.09375381469727, "completions/min_length": 68.25, "epoch": 2.9769173492181684, "grad_norm": 0.005161498705988699, "kl": 0.31103515625, "learning_rate": 8.057391939153149e-07, "loss": 0.0003113970742560923, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1998, "train_speed(iter/s)": 0.02913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 112.87500381469727, "completions/min_length": 68.5, "epoch": 2.978406552494415, "grad_norm": 0.7772671158675961, "kl": 0.28955078125, "learning_rate": 8.055520308333576e-07, "loss": 0.0056741125881671906, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 1999, "train_speed(iter/s)": 0.029134 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/mean_length": 108.11458587646484, "completions/min_length": 63.5, "epoch": 2.979895755770663, "grad_norm": 1.7848013057304106, "kl": 0.31298828125, "learning_rate": 8.053647993937435e-07, "loss": 0.008169908076524734, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.5208333488553762, "rewards/CineAccuracyORM/std": 0.4306785687804222, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2000, "train_speed(iter/s)": 0.029132 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 117.43750190734863, "completions/min_length": 58.25, "epoch": 2.9813849590469097, "grad_norm": 0.9768405833512541, "kl": 0.32275390625, "learning_rate": 8.051774996383593e-07, "loss": 0.0027400467079132795, "memory(GiB)": 112.53, "reward": 1.4062500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.4062500074505806, "rewards/CineAccuracyORM/std": 0.3307463824748993, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2001, "train_speed(iter/s)": 0.02911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 109.63541984558105, "completions/min_length": 52.75, "epoch": 2.9828741623231574, "grad_norm": 0.006944440779061508, "kl": 0.3076171875, "learning_rate": 8.049901316091076e-07, "loss": 0.00030745367985218763, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2002, "train_speed(iter/s)": 0.029111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.5, "completions/mean_length": 122.19791984558105, "completions/min_length": 60.75, "epoch": 2.984363365599404, "grad_norm": 0.005339688329423901, "kl": 0.28759765625, "learning_rate": 8.048026953479061e-07, "loss": 0.0002870617900043726, "memory(GiB)": 112.53, "reward": 1.3333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.3333333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2003, "train_speed(iter/s)": 0.029107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 106.12500381469727, "completions/min_length": 59.5, "epoch": 2.9858525688756514, "grad_norm": 0.005807542174361542, "kl": 0.31689453125, "learning_rate": 8.046151908966876e-07, "loss": 0.00031743996078148484, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2004, "train_speed(iter/s)": 0.029111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 114.72916984558105, "completions/min_length": 51.75, "epoch": 2.9873417721518987, "grad_norm": 0.005467430936423643, "kl": 0.3125, "learning_rate": 8.044276182974006e-07, "loss": 0.00031316536478698254, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2005, "train_speed(iter/s)": 0.029107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 113.36458587646484, "completions/min_length": 63.5, "epoch": 2.988830975428146, "grad_norm": 0.005134360821439866, "kl": 0.2998046875, "learning_rate": 8.042399775920083e-07, "loss": 0.00030048764892853796, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2006, "train_speed(iter/s)": 0.029111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 103.26041984558105, "completions/min_length": 57.0, "epoch": 2.990320178704393, "grad_norm": 1.5898118910830532, "kl": 0.318359375, "learning_rate": 8.040522688224896e-07, "loss": -7.197099330369383e-05, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2007, "train_speed(iter/s)": 0.029116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 102.06250190734863, "completions/min_length": 54.75, "epoch": 2.9918093819806404, "grad_norm": 0.004732670840364358, "kl": 0.32861328125, "learning_rate": 8.038644920308382e-07, "loss": 0.00032878632191568613, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2008, "train_speed(iter/s)": 0.029118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 105.29166984558105, "completions/min_length": 59.5, "epoch": 2.9932985852568876, "grad_norm": 1.617646067295349, "kl": 0.31884765625, "learning_rate": 8.036766472590636e-07, "loss": -0.0045500523410737514, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.802083358168602, "rewards/CineAccuracyORM/std": 0.2973194234073162, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2009, "train_speed(iter/s)": 0.029121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 106.20833778381348, "completions/min_length": 54.75, "epoch": 2.994787788533135, "grad_norm": 1.6877293183039281, "kl": 0.32861328125, "learning_rate": 8.034887345491899e-07, "loss": -0.02145158313214779, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.12696419283747673, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2010, "train_speed(iter/s)": 0.029116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 104.32291793823242, "completions/min_length": 58.25, "epoch": 2.996276991809382, "grad_norm": 0.006771811051608053, "kl": 0.3251953125, "learning_rate": 8.033007539432568e-07, "loss": 0.0003249369328841567, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2011, "train_speed(iter/s)": 0.029118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 116.58333587646484, "completions/min_length": 53.5, "epoch": 2.997766195085629, "grad_norm": 1.008176909325378, "kl": 0.3017578125, "learning_rate": 8.03112705483319e-07, "loss": -0.004007589537650347, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2012, "train_speed(iter/s)": 0.029113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 112.59375190734863, "completions/min_length": 63.0, "epoch": 2.9992553983618766, "grad_norm": 0.005486371942558927, "kl": 0.30078125, "learning_rate": 8.029245892114462e-07, "loss": 0.0003002818557433784, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2013, "train_speed(iter/s)": 0.029105 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 108.90625190734863, "completions/min_length": 59.5, "epoch": 3.0014892032762472, "grad_norm": 0.004847468083102845, "kl": 0.30029296875, "learning_rate": 8.027364051697241e-07, "loss": 0.00030062624136917293, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2014, "train_speed(iter/s)": 0.029103 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 103.77083587646484, "completions/min_length": 59.75, "epoch": 3.0029784065524945, "grad_norm": 0.004413073558491872, "kl": 0.3056640625, "learning_rate": 8.025481534002523e-07, "loss": 0.00030609878012910485, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2015, "train_speed(iter/s)": 0.029099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.5, "completions/mean_length": 106.01041984558105, "completions/min_length": 52.25, "epoch": 3.0044676098287417, "grad_norm": 1.812964661282398, "kl": 0.31591796875, "learning_rate": 8.023598339451465e-07, "loss": 0.0017301262123510242, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2016, "train_speed(iter/s)": 0.029103 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 110.43750190734863, "completions/min_length": 59.25, "epoch": 3.005956813104989, "grad_norm": 0.004813457310472921, "kl": 0.30517578125, "learning_rate": 8.021714468465372e-07, "loss": 0.00030524012981913984, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2017, "train_speed(iter/s)": 0.029099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 116.62500190734863, "completions/min_length": 65.0, "epoch": 3.007446016381236, "grad_norm": 0.005392096354923977, "kl": 0.29345703125, "learning_rate": 8.019829921465702e-07, "loss": 0.00029369263211265206, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2018, "train_speed(iter/s)": 0.029102 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 109.40625190734863, "completions/min_length": 53.5, "epoch": 3.0089352196574835, "grad_norm": 0.00525663510122937, "kl": 0.32275390625, "learning_rate": 8.017944698874062e-07, "loss": 0.0003229803405702114, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2019, "train_speed(iter/s)": 0.029102 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 106.69791984558105, "completions/min_length": 51.75, "epoch": 3.0104244229337302, "grad_norm": 0.004759573799347788, "kl": 0.30322265625, "learning_rate": 8.016058801112213e-07, "loss": 0.0003029113868251443, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2020, "train_speed(iter/s)": 0.029102 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 98.04166984558105, "completions/min_length": 51.25, "epoch": 3.0119136262099775, "grad_norm": 0.004612915708860259, "kl": 0.34033203125, "learning_rate": 8.014172228602063e-07, "loss": 0.00034105111262761056, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2021, "train_speed(iter/s)": 0.029099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 105.61458587646484, "completions/min_length": 51.25, "epoch": 3.0134028294862247, "grad_norm": 0.004766811100760693, "kl": 0.32080078125, "learning_rate": 8.012284981765674e-07, "loss": 0.00032062054378911853, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2022, "train_speed(iter/s)": 0.0291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 113.78125381469727, "completions/min_length": 62.25, "epoch": 3.014892032762472, "grad_norm": 0.7369266683324479, "kl": 0.302734375, "learning_rate": 8.010397061025258e-07, "loss": 0.0030882488936185837, "memory(GiB)": 112.53, "reward": 1.6458333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2023, "train_speed(iter/s)": 0.029099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.75, "completions/mean_length": 111.86458587646484, "completions/min_length": 56.25, "epoch": 3.016381236038719, "grad_norm": 1.0701830179389549, "kl": 0.31005859375, "learning_rate": 8.008508466803178e-07, "loss": 0.02150706946849823, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2024, "train_speed(iter/s)": 0.029102 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.0, "completions/mean_length": 108.97916984558105, "completions/min_length": 60.0, "epoch": 3.0178704393149665, "grad_norm": 0.08102225147376751, "kl": 0.3408203125, "learning_rate": 8.00661919952195e-07, "loss": 0.00034048405359499156, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2025, "train_speed(iter/s)": 0.029095 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 105.78125190734863, "completions/min_length": 59.0, "epoch": 3.0193596425912137, "grad_norm": 0.005613324130534122, "kl": 0.30712890625, "learning_rate": 8.004729259604235e-07, "loss": 0.0003068086807616055, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2026, "train_speed(iter/s)": 0.029088 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.75, "completions/mean_length": 118.34375381469727, "completions/min_length": 61.5, "epoch": 3.020848845867461, "grad_norm": 1.0847772399083788, "kl": 0.31005859375, "learning_rate": 8.002838647472849e-07, "loss": 0.022371094673871994, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2027, "train_speed(iter/s)": 0.02908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 116.55208778381348, "completions/min_length": 63.25, "epoch": 3.022338049143708, "grad_norm": 0.5506283209555832, "kl": 0.66943359375, "learning_rate": 8.000947363550754e-07, "loss": -0.001063252449966967, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2028, "train_speed(iter/s)": 0.02908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 120.91667175292969, "completions/min_length": 57.5, "epoch": 3.0238272524199554, "grad_norm": 0.005141538218842433, "kl": 0.28857421875, "learning_rate": 7.99905540826107e-07, "loss": 0.0002887993468903005, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2029, "train_speed(iter/s)": 0.02908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 107.68750381469727, "completions/min_length": 63.5, "epoch": 3.0253164556962027, "grad_norm": 1.233948438812389, "kl": 0.3232421875, "learning_rate": 7.99716278202706e-07, "loss": -0.0010754888644441962, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8125000298023224, "rewards/CineAccuracyORM/std": 0.3428337797522545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2030, "train_speed(iter/s)": 0.029078 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 112.28125190734863, "completions/min_length": 56.0, "epoch": 3.02680565897245, "grad_norm": 0.005010040331675352, "kl": 0.2919921875, "learning_rate": 7.99526948527214e-07, "loss": 0.0002916083612944931, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2031, "train_speed(iter/s)": 0.029082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.75, "completions/mean_length": 100.62500190734863, "completions/min_length": 52.75, "epoch": 3.028294862248697, "grad_norm": 0.00571154745219745, "kl": 0.33642578125, "learning_rate": 7.993375518419876e-07, "loss": 0.0003360170521773398, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2032, "train_speed(iter/s)": 0.029086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 110.42708587646484, "completions/min_length": 50.75, "epoch": 3.029784065524944, "grad_norm": 0.8177294142494097, "kl": 0.29248046875, "learning_rate": 7.991480881893982e-07, "loss": -0.00507726613432169, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2033, "train_speed(iter/s)": 0.029082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 110.78125190734863, "completions/min_length": 60.5, "epoch": 3.031273268801191, "grad_norm": 0.005320972948939378, "kl": 0.30419921875, "learning_rate": 7.989585576118326e-07, "loss": 0.0003046713536605239, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2034, "train_speed(iter/s)": 0.029082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 100.88541984558105, "completions/min_length": 47.25, "epoch": 3.0327624720774384, "grad_norm": 0.6691236058655384, "kl": 0.31982421875, "learning_rate": 7.98768960151692e-07, "loss": -0.00896475464105606, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2035, "train_speed(iter/s)": 0.029082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 105.30208587646484, "completions/min_length": 58.25, "epoch": 3.0342516753536857, "grad_norm": 0.005428415727864043, "kl": 0.32666015625, "learning_rate": 7.985792958513931e-07, "loss": 0.0003267470747232437, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2036, "train_speed(iter/s)": 0.029078 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 98.95833587646484, "completions/min_length": 55.25, "epoch": 3.035740878629933, "grad_norm": 0.005496855360366023, "kl": 0.30810546875, "learning_rate": 7.983895647533672e-07, "loss": 0.00030763688846491277, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2037, "train_speed(iter/s)": 0.029079 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 103.63541984558105, "completions/min_length": 52.75, "epoch": 3.03723008190618, "grad_norm": 0.005206889500535175, "kl": 0.3232421875, "learning_rate": 7.981997669000606e-07, "loss": 0.0003225817927159369, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2038, "train_speed(iter/s)": 0.029075 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.75, "completions/mean_length": 107.40625381469727, "completions/min_length": 61.0, "epoch": 3.0387192851824274, "grad_norm": 0.7498748537686104, "kl": 0.3046875, "learning_rate": 7.980099023339349e-07, "loss": -0.005694180727005005, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2039, "train_speed(iter/s)": 0.029075 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 107.79166793823242, "completions/min_length": 59.5, "epoch": 3.0402084884586746, "grad_norm": 0.005759545152491577, "kl": 0.30078125, "learning_rate": 7.97819971097466e-07, "loss": 0.0003005192265845835, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2040, "train_speed(iter/s)": 0.029077 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 113.41666793823242, "completions/min_length": 53.75, "epoch": 3.041697691734922, "grad_norm": 0.005515565540401525, "kl": 0.2919921875, "learning_rate": 7.976299732331453e-07, "loss": 0.0002915941004175693, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2041, "train_speed(iter/s)": 0.029078 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.75, "completions/mean_length": 113.36458587646484, "completions/min_length": 56.0, "epoch": 3.043186895011169, "grad_norm": 0.00577428208222342, "kl": 0.28466796875, "learning_rate": 7.974399087834786e-07, "loss": 0.000284497975371778, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2042, "train_speed(iter/s)": 0.029077 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 104.52083587646484, "completions/min_length": 54.5, "epoch": 3.0446760982874164, "grad_norm": 1.481803064879494, "kl": 0.31884765625, "learning_rate": 7.97249777790987e-07, "loss": -0.01277773454785347, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2043, "train_speed(iter/s)": 0.029073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 107.81250190734863, "completions/min_length": 51.25, "epoch": 3.0461653015636636, "grad_norm": 0.0053836159051306115, "kl": 0.3173828125, "learning_rate": 7.970595802982063e-07, "loss": 0.0003176889440510422, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2044, "train_speed(iter/s)": 0.029066 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 102.98958587646484, "completions/min_length": 57.0, "epoch": 3.047654504839911, "grad_norm": 0.005668821336279388, "kl": 0.3466796875, "learning_rate": 7.968693163476871e-07, "loss": 0.0003465843328740448, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2045, "train_speed(iter/s)": 0.029063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.0, "completions/mean_length": 104.19791984558105, "completions/min_length": 57.25, "epoch": 3.0491437081161576, "grad_norm": 1.1431940051487326, "kl": 0.33203125, "learning_rate": 7.966789859819951e-07, "loss": -0.010850155726075172, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2046, "train_speed(iter/s)": 0.029059 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.0, "completions/mean_length": 100.89583396911621, "completions/min_length": 52.0, "epoch": 3.050632911392405, "grad_norm": 0.005492439753024221, "kl": 0.330078125, "learning_rate": 7.964885892437108e-07, "loss": 0.000330126698827371, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2047, "train_speed(iter/s)": 0.02906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.75, "completions/mean_length": 106.73958587646484, "completions/min_length": 60.25, "epoch": 3.052122114668652, "grad_norm": 0.558485280189712, "kl": 0.29150390625, "learning_rate": 7.962981261754294e-07, "loss": 0.007778177037835121, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2048, "train_speed(iter/s)": 0.029064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 108.05208778381348, "completions/min_length": 47.0, "epoch": 3.0536113179448994, "grad_norm": 0.996238478080139, "kl": 0.31787109375, "learning_rate": 7.96107596819761e-07, "loss": 0.004693024326115847, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2049, "train_speed(iter/s)": 0.029057 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 105.72916984558105, "completions/min_length": 64.75, "epoch": 3.0551005212211466, "grad_norm": 0.004623892634029837, "kl": 0.2978515625, "learning_rate": 7.959170012193305e-07, "loss": 0.0002978578850161284, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2050, "train_speed(iter/s)": 0.029053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 111.31250381469727, "completions/min_length": 56.0, "epoch": 3.056589724497394, "grad_norm": 1.6065075515435323, "kl": 0.31787109375, "learning_rate": 7.957263394167777e-07, "loss": 0.0017020662780851126, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2051, "train_speed(iter/s)": 0.029053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 111.15625190734863, "completions/min_length": 65.75, "epoch": 3.058078927773641, "grad_norm": 0.6478951619100701, "kl": 0.3037109375, "learning_rate": 7.955356114547572e-07, "loss": 0.0021950264926999807, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2052, "train_speed(iter/s)": 0.029057 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 102.98958587646484, "completions/min_length": 57.25, "epoch": 3.0595681310498883, "grad_norm": 0.9969490141525242, "kl": 0.33349609375, "learning_rate": 7.953448173759385e-07, "loss": -0.024128511548042297, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2053, "train_speed(iter/s)": 0.029058 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 110.29166984558105, "completions/min_length": 54.0, "epoch": 3.0610573343261356, "grad_norm": 1.4492151276849976, "kl": 0.3076171875, "learning_rate": 7.951539572230058e-07, "loss": -0.013623947277665138, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.42743058502674103, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2054, "train_speed(iter/s)": 0.029058 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.5, "completions/mean_length": 107.27083587646484, "completions/min_length": 61.5, "epoch": 3.062546537602383, "grad_norm": 0.004850374201468906, "kl": 0.30712890625, "learning_rate": 7.949630310386577e-07, "loss": 0.0003063329204451293, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2055, "train_speed(iter/s)": 0.029051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.25, "completions/mean_length": 104.90625190734863, "completions/min_length": 53.25, "epoch": 3.06403574087863, "grad_norm": 0.005172186216850385, "kl": 0.29248046875, "learning_rate": 7.947720388656081e-07, "loss": 0.0002929574984591454, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2056, "train_speed(iter/s)": 0.029052 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 112.28125190734863, "completions/min_length": 59.75, "epoch": 3.0655249441548773, "grad_norm": 0.005109334552650048, "kl": 0.298828125, "learning_rate": 7.945809807465857e-07, "loss": 0.00029886752599850297, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2057, "train_speed(iter/s)": 0.029056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 109.69791984558105, "completions/min_length": 47.5, "epoch": 3.0670141474311245, "grad_norm": 0.0054248499969121525, "kl": 0.32177734375, "learning_rate": 7.943898567243335e-07, "loss": 0.0003215912729501724, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2058, "train_speed(iter/s)": 0.029051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 111.76041984558105, "completions/min_length": 64.0, "epoch": 3.0685033507073713, "grad_norm": 0.8664548368702572, "kl": 0.30615234375, "learning_rate": 7.941986668416096e-07, "loss": 0.003478096332401037, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2059, "train_speed(iter/s)": 0.029047 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 118.25000381469727, "completions/min_length": 61.0, "epoch": 3.0699925539836186, "grad_norm": 0.0058187797316728036, "kl": 0.30419921875, "learning_rate": 7.940074111411868e-07, "loss": 0.00030388229060918093, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2060, "train_speed(iter/s)": 0.029047 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 108.29166984558105, "completions/min_length": 61.5, "epoch": 3.071481757259866, "grad_norm": 0.8166038772767964, "kl": 0.32421875, "learning_rate": 7.938160896658524e-07, "loss": -0.014979755505919456, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2061, "train_speed(iter/s)": 0.029051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 107.55208587646484, "completions/min_length": 51.5, "epoch": 3.072970960536113, "grad_norm": 0.004996570169323588, "kl": 0.31787109375, "learning_rate": 7.936247024584087e-07, "loss": 0.00031846354249864817, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2062, "train_speed(iter/s)": 0.029047 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 115.55208396911621, "completions/min_length": 66.25, "epoch": 3.0744601638123603, "grad_norm": 0.06066368896898681, "kl": 0.322265625, "learning_rate": 7.934332495616722e-07, "loss": 0.000322139443596825, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2063, "train_speed(iter/s)": 0.029047 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.0, "completions/mean_length": 118.5625057220459, "completions/min_length": 49.5, "epoch": 3.0759493670886076, "grad_norm": 0.005512230362098219, "kl": 0.2998046875, "learning_rate": 7.932417310184751e-07, "loss": 0.0002991034125443548, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2064, "train_speed(iter/s)": 0.029039 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 111.05208778381348, "completions/min_length": 53.75, "epoch": 3.077438570364855, "grad_norm": 0.7492192623762188, "kl": 0.31298828125, "learning_rate": 7.93050146871663e-07, "loss": 0.017798058688640594, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2065, "train_speed(iter/s)": 0.029043 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 107.43750190734863, "completions/min_length": 41.0, "epoch": 3.078927773641102, "grad_norm": 0.005697324755215116, "kl": 0.310546875, "learning_rate": 7.928584971640974e-07, "loss": 0.0003102028858847916, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2066, "train_speed(iter/s)": 0.02904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 116.48958587646484, "completions/min_length": 53.25, "epoch": 3.0804169769173493, "grad_norm": 0.22710185038747166, "kl": 0.3505859375, "learning_rate": 7.926667819386535e-07, "loss": 0.0003505250788293779, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2067, "train_speed(iter/s)": 0.02904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.5, "completions/mean_length": 108.61458587646484, "completions/min_length": 57.25, "epoch": 3.0819061801935965, "grad_norm": 0.004515750981037688, "kl": 0.30712890625, "learning_rate": 7.924750012382218e-07, "loss": 0.00030675213201902807, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2068, "train_speed(iter/s)": 0.029039 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 118.86458778381348, "completions/min_length": 59.5, "epoch": 3.0833953834698438, "grad_norm": 0.005012884636099922, "kl": 0.2958984375, "learning_rate": 7.922831551057068e-07, "loss": 0.0002955901436507702, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2069, "train_speed(iter/s)": 0.029031 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 125.40625190734863, "completions/min_length": 64.0, "epoch": 3.084884586746091, "grad_norm": 1.1022032108817814, "kl": 0.28662109375, "learning_rate": 7.920912435840286e-07, "loss": 0.021473636850714684, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7187500223517418, "rewards/CineAccuracyORM/std": 0.36238520964980125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2070, "train_speed(iter/s)": 0.029034 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 112.11458587646484, "completions/min_length": 56.0, "epoch": 3.0863737900223382, "grad_norm": 0.005645882307491906, "kl": 0.298828125, "learning_rate": 7.918992667161208e-07, "loss": 0.00029903833637945354, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2071, "train_speed(iter/s)": 0.029038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 117.38542175292969, "completions/min_length": 64.5, "epoch": 3.087862993298585, "grad_norm": 0.9623250081954933, "kl": 0.2890625, "learning_rate": 7.917072245449325e-07, "loss": 0.008464265614748001, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2072, "train_speed(iter/s)": 0.029033 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 114.28125381469727, "completions/min_length": 45.25, "epoch": 3.0893521965748323, "grad_norm": 0.8988855601523471, "kl": 0.298828125, "learning_rate": 7.915151171134273e-07, "loss": 0.0016866180812940001, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2073, "train_speed(iter/s)": 0.029037 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.0, "completions/mean_length": 113.31250381469727, "completions/min_length": 60.0, "epoch": 3.0908413998510795, "grad_norm": 1.0792498669902313, "kl": 0.29638671875, "learning_rate": 7.913229444645828e-07, "loss": 0.006169642321765423, "memory(GiB)": 112.53, "reward": 1.9791666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2074, "train_speed(iter/s)": 0.029035 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 118.89583587646484, "completions/min_length": 61.5, "epoch": 3.0923306031273268, "grad_norm": 0.004769663036150253, "kl": 0.29736328125, "learning_rate": 7.911307066413918e-07, "loss": 0.0002967490581795573, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2075, "train_speed(iter/s)": 0.029028 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 117.08333778381348, "completions/min_length": 49.75, "epoch": 3.093819806403574, "grad_norm": 1.2902353290725104, "kl": 0.3134765625, "learning_rate": 7.909384036868613e-07, "loss": -0.009622471407055855, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2076, "train_speed(iter/s)": 0.029025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 110.13541793823242, "completions/min_length": 55.75, "epoch": 3.0953090096798213, "grad_norm": 0.005971781892303238, "kl": 0.31494140625, "learning_rate": 7.907460356440134e-07, "loss": 0.0003148607793264091, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2077, "train_speed(iter/s)": 0.029025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 112.70833778381348, "completions/min_length": 60.5, "epoch": 3.0967982129560685, "grad_norm": 0.5430273018220655, "kl": 0.32373046875, "learning_rate": 7.905536025558838e-07, "loss": 0.008266872726380825, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2078, "train_speed(iter/s)": 0.029017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.0, "completions/mean_length": 116.25000190734863, "completions/min_length": 58.25, "epoch": 3.0982874162323157, "grad_norm": 0.004622269343776054, "kl": 0.2998046875, "learning_rate": 7.90361104465524e-07, "loss": 0.0002995036484207958, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2079, "train_speed(iter/s)": 0.029013 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 116.22917175292969, "completions/min_length": 63.0, "epoch": 3.099776619508563, "grad_norm": 0.00513227260722488, "kl": 0.27880859375, "learning_rate": 7.901685414159989e-07, "loss": 0.0002782201918307692, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2080, "train_speed(iter/s)": 0.029014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.75, "completions/mean_length": 106.92708587646484, "completions/min_length": 51.5, "epoch": 3.1012658227848102, "grad_norm": 0.005154490558647862, "kl": 0.3134765625, "learning_rate": 7.899759134503887e-07, "loss": 0.00031309504993259907, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2081, "train_speed(iter/s)": 0.029018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 118.90625190734863, "completions/min_length": 57.75, "epoch": 3.1027550260610575, "grad_norm": 0.004846467501728558, "kl": 0.2900390625, "learning_rate": 7.897832206117878e-07, "loss": 0.00028983992524445057, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2082, "train_speed(iter/s)": 0.029017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 111.62500190734863, "completions/min_length": 47.75, "epoch": 3.1042442293373047, "grad_norm": 0.006323454748578941, "kl": 0.30029296875, "learning_rate": 7.895904629433051e-07, "loss": 0.00030052303918637335, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2083, "train_speed(iter/s)": 0.029014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 111.04167175292969, "completions/min_length": 52.0, "epoch": 3.105733432613552, "grad_norm": 0.005740202756111609, "kl": 0.29638671875, "learning_rate": 7.893976404880641e-07, "loss": 0.00029638042906299233, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2084, "train_speed(iter/s)": 0.029013 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 109.83333778381348, "completions/min_length": 57.5, "epoch": 3.1072226358897987, "grad_norm": 0.6732271896253763, "kl": 0.3056640625, "learning_rate": 7.892047532892028e-07, "loss": -0.0011635938426479697, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2085, "train_speed(iter/s)": 0.029013 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.75, "completions/mean_length": 125.55208587646484, "completions/min_length": 61.75, "epoch": 3.108711839166046, "grad_norm": 0.005767704814449158, "kl": 0.27734375, "learning_rate": 7.890118013898734e-07, "loss": 0.00027781989774666727, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2086, "train_speed(iter/s)": 0.029012 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.75, "completions/mean_length": 110.00000190734863, "completions/min_length": 50.0, "epoch": 3.1102010424422932, "grad_norm": 1.2031156694361371, "kl": 0.3115234375, "learning_rate": 7.888187848332433e-07, "loss": -0.012057190760970116, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2087, "train_speed(iter/s)": 0.029015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 100.50000381469727, "completions/min_length": 37.5, "epoch": 3.1116902457185405, "grad_norm": 0.005254876965845898, "kl": 0.314453125, "learning_rate": 7.886257036624935e-07, "loss": 0.0003147800453007221, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2088, "train_speed(iter/s)": 0.029015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 105.77083587646484, "completions/min_length": 52.25, "epoch": 3.1131794489947877, "grad_norm": 0.6951439437141401, "kl": 0.3447265625, "learning_rate": 7.8843255792082e-07, "loss": 0.012758122757077217, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2089, "train_speed(iter/s)": 0.02901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.75, "completions/mean_length": 122.02083587646484, "completions/min_length": 56.0, "epoch": 3.114668652271035, "grad_norm": 0.005130893120277219, "kl": 0.2890625, "learning_rate": 7.882393476514329e-07, "loss": 0.0002888884337153286, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2090, "train_speed(iter/s)": 0.02901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 112.82291793823242, "completions/min_length": 62.5, "epoch": 3.116157855547282, "grad_norm": 0.004847704396811757, "kl": 0.30419921875, "learning_rate": 7.88046072897557e-07, "loss": 0.00030438369140028954, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2091, "train_speed(iter/s)": 0.02901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 112.01041793823242, "completions/min_length": 50.25, "epoch": 3.1176470588235294, "grad_norm": 0.005449884049861737, "kl": 0.306640625, "learning_rate": 7.878527337024317e-07, "loss": 0.00030633568530902267, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2092, "train_speed(iter/s)": 0.029002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 103.70833778381348, "completions/min_length": 49.0, "epoch": 3.1191362620997767, "grad_norm": 0.6013922209983384, "kl": 0.31591796875, "learning_rate": 7.876593301093103e-07, "loss": 0.01009318046271801, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2093, "train_speed(iter/s)": 0.029001 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 102.73958778381348, "completions/min_length": 52.0, "epoch": 3.120625465376024, "grad_norm": 0.005525809568024022, "kl": 0.31494140625, "learning_rate": 7.874658621614607e-07, "loss": 0.0003152007411699742, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2094, "train_speed(iter/s)": 0.028998 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 111.54167175292969, "completions/min_length": 46.25, "epoch": 3.122114668652271, "grad_norm": 0.5935315124711135, "kl": 0.326171875, "learning_rate": 7.872723299021655e-07, "loss": 0.01234535500407219, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2095, "train_speed(iter/s)": 0.028993 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 105.69792175292969, "completions/min_length": 55.5, "epoch": 3.1236038719285184, "grad_norm": 0.00575475385530091, "kl": 0.31689453125, "learning_rate": 7.870787333747214e-07, "loss": 0.0003169587580487132, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2096, "train_speed(iter/s)": 0.028989 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 116.86458587646484, "completions/min_length": 56.75, "epoch": 3.1250930752047656, "grad_norm": 0.005212391797744821, "kl": 0.29638671875, "learning_rate": 7.868850726224395e-07, "loss": 0.00029604346491396427, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2097, "train_speed(iter/s)": 0.028985 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 109.77083778381348, "completions/min_length": 50.5, "epoch": 3.1265822784810124, "grad_norm": 1.352952389975388, "kl": 0.2998046875, "learning_rate": 7.866913476886452e-07, "loss": 0.009020565077662468, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6041666939854622, "rewards/CineAccuracyORM/std": 0.47292453050613403, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2098, "train_speed(iter/s)": 0.028985 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 113.71875381469727, "completions/min_length": 47.75, "epoch": 3.1280714817572597, "grad_norm": 0.006080764121087417, "kl": 0.3046875, "learning_rate": 7.864975586166787e-07, "loss": 0.00030456221429631114, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2099, "train_speed(iter/s)": 0.028988 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 108.52083587646484, "completions/min_length": 44.25, "epoch": 3.129560685033507, "grad_norm": 1.710277161166798, "kl": 0.32421875, "learning_rate": 7.863037054498937e-07, "loss": 0.02784411981701851, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.48734044283628464, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2100, "train_speed(iter/s)": 0.028981 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 108.41666984558105, "completions/min_length": 50.75, "epoch": 3.131049888309754, "grad_norm": 0.005494226017909824, "kl": 0.30517578125, "learning_rate": 7.86109788231659e-07, "loss": 0.00030464123119600117, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2101, "train_speed(iter/s)": 0.028977 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 103.39583587646484, "completions/min_length": 53.25, "epoch": 3.1325390915860014, "grad_norm": 0.0055216904667452415, "kl": 0.3193359375, "learning_rate": 7.859158070053576e-07, "loss": 0.0003190429415553808, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2102, "train_speed(iter/s)": 0.028977 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.0, "completions/mean_length": 108.20833587646484, "completions/min_length": 48.75, "epoch": 3.1340282948622487, "grad_norm": 0.6686988547578141, "kl": 0.306640625, "learning_rate": 7.857217618143866e-07, "loss": -0.005341155454516411, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2103, "train_speed(iter/s)": 0.028982 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 108.89583587646484, "completions/min_length": 50.5, "epoch": 3.135517498138496, "grad_norm": 1.6192838622304369, "kl": 0.30224609375, "learning_rate": 7.855276527021575e-07, "loss": 0.006841020192950964, "memory(GiB)": 112.53, "reward": 1.90625, "reward_std": 0.0876726359128952, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2104, "train_speed(iter/s)": 0.028985 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 112.97916984558105, "completions/min_length": 55.75, "epoch": 3.137006701414743, "grad_norm": 0.005934055511876659, "kl": 0.306640625, "learning_rate": 7.853334797120961e-07, "loss": 0.00030671266722492874, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2105, "train_speed(iter/s)": 0.028976 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 104.51041984558105, "completions/min_length": 46.75, "epoch": 3.1384959046909904, "grad_norm": 0.006166658851345654, "kl": 0.32861328125, "learning_rate": 7.851392428876425e-07, "loss": 0.00032845610985532403, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2106, "train_speed(iter/s)": 0.02898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 118.81250190734863, "completions/min_length": 51.25, "epoch": 3.1399851079672376, "grad_norm": 0.6907182599772053, "kl": 0.2900390625, "learning_rate": 7.849449422722512e-07, "loss": -0.0013260599225759506, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2107, "train_speed(iter/s)": 0.028983 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.25, "completions/mean_length": 107.34375381469727, "completions/min_length": 54.25, "epoch": 3.141474311243485, "grad_norm": 1.0242129500288941, "kl": 0.32421875, "learning_rate": 7.847505779093906e-07, "loss": -0.005923469550907612, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2108, "train_speed(iter/s)": 0.028983 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 106.50000381469727, "completions/min_length": 55.25, "epoch": 3.142963514519732, "grad_norm": 0.005028849154491949, "kl": 0.32470703125, "learning_rate": 7.845561498425439e-07, "loss": 0.0003248784632887691, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2109, "train_speed(iter/s)": 0.02898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.75, "completions/mean_length": 113.44792175292969, "completions/min_length": 55.0, "epoch": 3.1444527177959793, "grad_norm": 0.7181328288666752, "kl": 0.3037109375, "learning_rate": 7.843616581152079e-07, "loss": 0.005641627125442028, "memory(GiB)": 112.53, "reward": 1.354166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.354166679084301, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2110, "train_speed(iter/s)": 0.028982 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 105.82291984558105, "completions/min_length": 54.25, "epoch": 3.145941921072226, "grad_norm": 0.005479909291384372, "kl": 0.318359375, "learning_rate": 7.841671027708944e-07, "loss": 0.0003180011117365211, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2111, "train_speed(iter/s)": 0.028986 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.75, "completions/mean_length": 111.84375190734863, "completions/min_length": 54.5, "epoch": 3.1474311243484734, "grad_norm": 0.005377358927594323, "kl": 0.3046875, "learning_rate": 7.839724838531288e-07, "loss": 0.00030431398772634566, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2112, "train_speed(iter/s)": 0.02899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 105.57291984558105, "completions/min_length": 53.75, "epoch": 3.1489203276247206, "grad_norm": 1.0452597366967744, "kl": 0.33935546875, "learning_rate": 7.83777801405451e-07, "loss": 0.01685657911002636, "memory(GiB)": 112.53, "reward": 1.854166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2113, "train_speed(iter/s)": 0.028994 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 104.22916793823242, "completions/min_length": 49.5, "epoch": 3.150409530900968, "grad_norm": 1.1881507796466186, "kl": 0.3134765625, "learning_rate": 7.835830554714153e-07, "loss": 0.013324066996574402, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2114, "train_speed(iter/s)": 0.028987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 108.57291984558105, "completions/min_length": 54.0, "epoch": 3.151898734177215, "grad_norm": 0.9649123868286525, "kl": 0.29931640625, "learning_rate": 7.833882460945896e-07, "loss": -0.0013415254652500153, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.40968769788742065, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2115, "train_speed(iter/s)": 0.028982 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.75, "completions/mean_length": 103.40625190734863, "completions/min_length": 55.25, "epoch": 3.1533879374534624, "grad_norm": 0.005054724644053917, "kl": 0.3154296875, "learning_rate": 7.831933733185565e-07, "loss": 0.00031594655592925847, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2116, "train_speed(iter/s)": 0.028986 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.25, "completions/mean_length": 101.33333587646484, "completions/min_length": 48.5, "epoch": 3.1548771407297096, "grad_norm": 0.7769894677098546, "kl": 0.37548828125, "learning_rate": 7.829984371869129e-07, "loss": -0.011268715374171734, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2117, "train_speed(iter/s)": 0.028979 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.75, "completions/mean_length": 101.44791984558105, "completions/min_length": 49.5, "epoch": 3.156366344005957, "grad_norm": 1.0231953617326788, "kl": 0.34423828125, "learning_rate": 7.828034377432693e-07, "loss": -0.00890280306339264, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2118, "train_speed(iter/s)": 0.028984 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 116.73958778381348, "completions/min_length": 49.5, "epoch": 3.157855547282204, "grad_norm": 0.006231634982505104, "kl": 0.29638671875, "learning_rate": 7.826083750312509e-07, "loss": 0.0002964124141726643, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2119, "train_speed(iter/s)": 0.028987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.0, "completions/mean_length": 113.87500190734863, "completions/min_length": 50.0, "epoch": 3.1593447505584513, "grad_norm": 0.6529102162481704, "kl": 0.3310546875, "learning_rate": 7.824132490944967e-07, "loss": -0.002066422486677766, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2120, "train_speed(iter/s)": 0.028987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 117.82291984558105, "completions/min_length": 63.25, "epoch": 3.1608339538346986, "grad_norm": 0.016761528263254024, "kl": 0.30126953125, "learning_rate": 7.822180599766601e-07, "loss": 0.00030109938234090805, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2121, "train_speed(iter/s)": 0.02899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 118.03125190734863, "completions/min_length": 53.5, "epoch": 3.162323157110946, "grad_norm": 0.4269042056492043, "kl": 0.30810546875, "learning_rate": 7.820228077214086e-07, "loss": -0.013828897848725319, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2122, "train_speed(iter/s)": 0.028988 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 116.43750381469727, "completions/min_length": 48.0, "epoch": 3.163812360387193, "grad_norm": 1.1304360827309747, "kl": 0.3037109375, "learning_rate": 7.818274923724235e-07, "loss": 0.002090438036248088, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.3325323313474655, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2123, "train_speed(iter/s)": 0.028984 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 108.56250381469727, "completions/min_length": 52.25, "epoch": 3.16530156366344, "grad_norm": 0.5678516905817691, "kl": 0.306396484375, "learning_rate": 7.816321139734006e-07, "loss": -0.011640055105090141, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2124, "train_speed(iter/s)": 0.028986 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.5, "completions/mean_length": 109.10416793823242, "completions/min_length": 51.75, "epoch": 3.166790766939687, "grad_norm": 0.7852214481437721, "kl": 0.32470703125, "learning_rate": 7.814366725680499e-07, "loss": 0.009630704298615456, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2125, "train_speed(iter/s)": 0.028985 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 114.91666984558105, "completions/min_length": 56.0, "epoch": 3.1682799702159343, "grad_norm": 0.005277486913375755, "kl": 0.31005859375, "learning_rate": 7.812411682000948e-07, "loss": 0.0003104521310888231, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2126, "train_speed(iter/s)": 0.028985 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 115.46875381469727, "completions/min_length": 47.75, "epoch": 3.1697691734921816, "grad_norm": 0.005581352282192306, "kl": 0.30859375, "learning_rate": 7.810456009132737e-07, "loss": 0.00030842816340737045, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2127, "train_speed(iter/s)": 0.028984 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.75, "completions/mean_length": 109.71875381469727, "completions/min_length": 50.25, "epoch": 3.171258376768429, "grad_norm": 1.0311059809909322, "kl": 0.29833984375, "learning_rate": 7.808499707513385e-07, "loss": 0.030971815809607506, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2128, "train_speed(iter/s)": 0.028987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 330.75, "completions/mean_length": 123.17708396911621, "completions/min_length": 53.0, "epoch": 3.172747580044676, "grad_norm": 0.005441209237683876, "kl": 0.2939453125, "learning_rate": 7.806542777580549e-07, "loss": 0.0002935033990070224, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2129, "train_speed(iter/s)": 0.028986 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 116.04166984558105, "completions/min_length": 54.25, "epoch": 3.1742367833209233, "grad_norm": 1.195855641950985, "kl": 0.32080078125, "learning_rate": 7.804585219772036e-07, "loss": -2.9053695470793173e-05, "memory(GiB)": 112.53, "reward": 1.4687500298023224, "reward_std": 0.10205466859042645, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.4595789238810539, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2130, "train_speed(iter/s)": 0.028981 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.25, "completions/mean_length": 116.68750381469727, "completions/min_length": 57.5, "epoch": 3.1757259865971705, "grad_norm": 0.004920953118285916, "kl": 0.302734375, "learning_rate": 7.802627034525786e-07, "loss": 0.00030282579245977104, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2131, "train_speed(iter/s)": 0.028974 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 115.57291793823242, "completions/min_length": 57.5, "epoch": 3.1772151898734178, "grad_norm": 1.136423961288108, "kl": 0.30322265625, "learning_rate": 7.80066822227988e-07, "loss": 0.018266156315803528, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2132, "train_speed(iter/s)": 0.028975 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 117.39583778381348, "completions/min_length": 46.0, "epoch": 3.178704393149665, "grad_norm": 0.00539694270484326, "kl": 0.29052734375, "learning_rate": 7.798708783472542e-07, "loss": 0.000290693249553442, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2133, "train_speed(iter/s)": 0.028978 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 106.77083587646484, "completions/min_length": 59.25, "epoch": 3.1801935964259123, "grad_norm": 0.8943441852130518, "kl": 0.31982421875, "learning_rate": 7.796748718542137e-07, "loss": 0.013771388679742813, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2134, "train_speed(iter/s)": 0.028982 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 106.83333587646484, "completions/min_length": 57.0, "epoch": 3.1816827997021595, "grad_norm": 1.0719177509598294, "kl": 0.32421875, "learning_rate": 7.794788027927164e-07, "loss": -0.008688249625265598, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2135, "train_speed(iter/s)": 0.028982 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 105.63541984558105, "completions/min_length": 49.0, "epoch": 3.1831720029784067, "grad_norm": 1.0791068162585895, "kl": 0.31640625, "learning_rate": 7.792826712066266e-07, "loss": -0.014098234474658966, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.23095712065696716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2136, "train_speed(iter/s)": 0.028978 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 107.41666793823242, "completions/min_length": 48.75, "epoch": 3.1846612062546535, "grad_norm": 0.005551910828672146, "kl": 0.296875, "learning_rate": 7.790864771398229e-07, "loss": 0.0002966269094031304, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2137, "train_speed(iter/s)": 0.028979 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 118.21875190734863, "completions/min_length": 58.25, "epoch": 3.186150409530901, "grad_norm": 1.079836433147437, "kl": 0.28125, "learning_rate": 7.788902206361973e-07, "loss": -0.006381993182003498, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.1034691073000431, "rewards/CineAccuracyORM/mean": 0.5416666828095913, "rewards/CineAccuracyORM/std": 0.4552694782614708, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2138, "train_speed(iter/s)": 0.028979 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 106.21875381469727, "completions/min_length": 56.5, "epoch": 3.187639612807148, "grad_norm": 0.9089489280788267, "kl": 0.32177734375, "learning_rate": 7.78693901739656e-07, "loss": -0.01245836727321148, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2139, "train_speed(iter/s)": 0.028976 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 111.36458587646484, "completions/min_length": 55.5, "epoch": 3.1891288160833953, "grad_norm": 0.7707040497376049, "kl": 0.31103515625, "learning_rate": 7.784975204941194e-07, "loss": -0.005659180693328381, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2140, "train_speed(iter/s)": 0.028972 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 121.25000381469727, "completions/min_length": 58.75, "epoch": 3.1906180193596425, "grad_norm": 0.5334432469778609, "kl": 0.2919921875, "learning_rate": 7.783010769435214e-07, "loss": -0.006397592835128307, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2141, "train_speed(iter/s)": 0.028975 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.0, "completions/mean_length": 127.77083778381348, "completions/min_length": 54.75, "epoch": 3.1921072226358898, "grad_norm": 0.9846895395301741, "kl": 0.288818359375, "learning_rate": 7.781045711318102e-07, "loss": 0.04224470257759094, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3307463824748993, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2142, "train_speed(iter/s)": 0.028973 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 106.20833778381348, "completions/min_length": 54.0, "epoch": 3.193596425912137, "grad_norm": 0.0047891606420222485, "kl": 0.32763671875, "learning_rate": 7.779080031029478e-07, "loss": 0.00032734923297539353, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2143, "train_speed(iter/s)": 0.028975 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 117.57291984558105, "completions/min_length": 61.5, "epoch": 3.1950856291883842, "grad_norm": 1.1588577250907743, "kl": 0.3056640625, "learning_rate": 7.777113729009099e-07, "loss": -0.000413104280596599, "memory(GiB)": 112.53, "reward": 1.6145833432674408, "reward_std": 0.11258216388523579, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.32938867807388306, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2144, "train_speed(iter/s)": 0.028972 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 107.59375190734863, "completions/min_length": 49.5, "epoch": 3.1965748324646315, "grad_norm": 0.00549490633425398, "kl": 0.3271484375, "learning_rate": 7.775146805696866e-07, "loss": 0.0003272255416959524, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2145, "train_speed(iter/s)": 0.028975 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 125.61458778381348, "completions/min_length": 56.75, "epoch": 3.1980640357408787, "grad_norm": 1.01121446554433, "kl": 0.28466796875, "learning_rate": 7.773179261532816e-07, "loss": 0.00826282612979412, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2146, "train_speed(iter/s)": 0.028971 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 115.44791984558105, "completions/min_length": 53.75, "epoch": 3.199553239017126, "grad_norm": 0.004885969998979254, "kl": 0.3046875, "learning_rate": 7.771211096957124e-07, "loss": 0.0003045277262572199, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2147, "train_speed(iter/s)": 0.028967 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 112.04166793823242, "completions/min_length": 60.5, "epoch": 3.201042442293373, "grad_norm": 1.1155510637787975, "kl": 0.31982421875, "learning_rate": 7.769242312410104e-07, "loss": 0.005795185919851065, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.802083358168602, "rewards/CineAccuracyORM/std": 0.2973194234073162, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2148, "train_speed(iter/s)": 0.028971 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.25, "completions/mean_length": 117.32291793823242, "completions/min_length": 57.75, "epoch": 3.2025316455696204, "grad_norm": 0.5232467136085039, "kl": 0.2998046875, "learning_rate": 7.767272908332212e-07, "loss": -0.010085087269544601, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2149, "train_speed(iter/s)": 0.02897 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.75, "completions/mean_length": 124.67708587646484, "completions/min_length": 45.0, "epoch": 3.2040208488458672, "grad_norm": 0.005363174299818049, "kl": 0.29296875, "learning_rate": 7.765302885164038e-07, "loss": 0.00029314530547708273, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2150, "train_speed(iter/s)": 0.028965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 112.15625190734863, "completions/min_length": 49.0, "epoch": 3.2055100521221145, "grad_norm": 0.6856073587569106, "kl": 0.32177734375, "learning_rate": 7.763332243346314e-07, "loss": -0.011809095740318298, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2151, "train_speed(iter/s)": 0.028957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 111.65625381469727, "completions/min_length": 48.25, "epoch": 3.2069992553983617, "grad_norm": 1.1646711664428442, "kl": 0.330078125, "learning_rate": 7.761360983319909e-07, "loss": -0.013644258491694927, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.3085566312074661, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2152, "train_speed(iter/s)": 0.028952 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 119.79166984558105, "completions/min_length": 53.0, "epoch": 3.208488458674609, "grad_norm": 0.0050697743347287, "kl": 0.2978515625, "learning_rate": 7.759389105525831e-07, "loss": 0.0002980809658765793, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2153, "train_speed(iter/s)": 0.028952 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 108.13541984558105, "completions/min_length": 56.5, "epoch": 3.209977661950856, "grad_norm": 0.5714854293804168, "kl": 0.3046875, "learning_rate": 7.757416610405225e-07, "loss": -0.006353714503347874, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2154, "train_speed(iter/s)": 0.028953 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 120.12500381469727, "completions/min_length": 55.5, "epoch": 3.2114668652271035, "grad_norm": 1.7016436876323306, "kl": 0.298828125, "learning_rate": 7.755443498399373e-07, "loss": 0.006386882625520229, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.1293872930109501, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.2726287692785263, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2155, "train_speed(iter/s)": 0.028952 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 120.45833587646484, "completions/min_length": 63.75, "epoch": 3.2129560685033507, "grad_norm": 0.9614811436084568, "kl": 0.28271484375, "learning_rate": 7.7534697699497e-07, "loss": 0.0011816896731033921, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2156, "train_speed(iter/s)": 0.028956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 120.65625190734863, "completions/min_length": 50.75, "epoch": 3.214445271779598, "grad_norm": 0.6819424414584239, "kl": 0.2919921875, "learning_rate": 7.751495425497764e-07, "loss": -0.007763025816529989, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2157, "train_speed(iter/s)": 0.028949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 123.4062557220459, "completions/min_length": 62.5, "epoch": 3.215934475055845, "grad_norm": 1.0176692797042834, "kl": 0.2998046875, "learning_rate": 7.749520465485261e-07, "loss": 0.012799570336937904, "memory(GiB)": 112.53, "reward": 1.5520833432674408, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5520833358168602, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2158, "train_speed(iter/s)": 0.028944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.75, "completions/mean_length": 125.16666984558105, "completions/min_length": 63.0, "epoch": 3.2174236783320924, "grad_norm": 1.649340074517356, "kl": 0.2890625, "learning_rate": 7.74754489035403e-07, "loss": -0.009053366258740425, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.4306785687804222, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2159, "train_speed(iter/s)": 0.028944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 130.05208587646484, "completions/min_length": 63.75, "epoch": 3.2189128816083397, "grad_norm": 1.1883613334102534, "kl": 0.28759765625, "learning_rate": 7.745568700546041e-07, "loss": 0.012478865683078766, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.11258216388523579, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.3739768713712692, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2160, "train_speed(iter/s)": 0.028943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 115.18750190734863, "completions/min_length": 61.25, "epoch": 3.220402084884587, "grad_norm": 0.5875515517086075, "kl": 0.2998046875, "learning_rate": 7.743591896503405e-07, "loss": -0.012512296438217163, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2161, "train_speed(iter/s)": 0.028943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.25, "completions/mean_length": 127.01042175292969, "completions/min_length": 59.5, "epoch": 3.221891288160834, "grad_norm": 1.1095329264733278, "kl": 0.28173828125, "learning_rate": 7.74161447866837e-07, "loss": -0.010826568119227886, "memory(GiB)": 112.53, "reward": 1.5625000298023224, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.5625000074505806, "rewards/CineAccuracyORM/std": 0.23095712065696716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2162, "train_speed(iter/s)": 0.028946 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 114.8750057220459, "completions/min_length": 60.75, "epoch": 3.223380491437081, "grad_norm": 1.249540741903286, "kl": 0.3115234375, "learning_rate": 7.739636447483319e-07, "loss": -0.0061135790310800076, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.25448867678642273, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2163, "train_speed(iter/s)": 0.02895 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 123.82291793823242, "completions/min_length": 55.5, "epoch": 3.224869694713328, "grad_norm": 1.2117913883859135, "kl": 0.29248046875, "learning_rate": 7.737657803390776e-07, "loss": 0.024383608251810074, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2164, "train_speed(iter/s)": 0.028946 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.0, "completions/mean_length": 127.97916984558105, "completions/min_length": 54.5, "epoch": 3.2263588979895754, "grad_norm": 0.005144232238093295, "kl": 0.28515625, "learning_rate": 7.735678546833402e-07, "loss": 0.00028479454340413213, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2165, "train_speed(iter/s)": 0.028945 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 123.60417175292969, "completions/min_length": 63.0, "epoch": 3.2278481012658227, "grad_norm": 0.8459261115480008, "kl": 0.28564453125, "learning_rate": 7.733698678253991e-07, "loss": 0.009229704737663269, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2166, "train_speed(iter/s)": 0.028944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 126.80208396911621, "completions/min_length": 65.5, "epoch": 3.22933730454207, "grad_norm": 1.8495929411257954, "kl": 0.294921875, "learning_rate": 7.731718198095478e-07, "loss": -0.00013556393969338387, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.165540037676692, "rewards/CineAccuracyORM/mean": 0.7812500298023224, "rewards/CineAccuracyORM/std": 0.34822922945022583, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2167, "train_speed(iter/s)": 0.028944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 131.40625381469727, "completions/min_length": 61.0, "epoch": 3.230826507818317, "grad_norm": 0.004659689427950966, "kl": 0.2900390625, "learning_rate": 7.729737106800931e-07, "loss": 0.0002902145788539201, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2168, "train_speed(iter/s)": 0.02894 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.5, "completions/mean_length": 130.7083339691162, "completions/min_length": 57.0, "epoch": 3.2323157110945644, "grad_norm": 0.6061645585820394, "kl": 0.273681640625, "learning_rate": 7.727755404813558e-07, "loss": -0.011745359748601913, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2169, "train_speed(iter/s)": 0.028939 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.75, "completions/mean_length": 117.34375762939453, "completions/min_length": 49.75, "epoch": 3.2338049143708116, "grad_norm": 0.005517192632143556, "kl": 0.2998046875, "learning_rate": 7.725773092576703e-07, "loss": 0.00029960001120343804, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2170, "train_speed(iter/s)": 0.028935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 118.88542175292969, "completions/min_length": 60.75, "epoch": 3.235294117647059, "grad_norm": 0.006241449679558419, "kl": 0.29443359375, "learning_rate": 7.723790170533846e-07, "loss": 0.00029475503833964467, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2171, "train_speed(iter/s)": 0.028934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 122.03125381469727, "completions/min_length": 55.0, "epoch": 3.236783320923306, "grad_norm": 0.8383985245051462, "kl": 0.2919921875, "learning_rate": 7.721806639128603e-07, "loss": 0.007432376500219107, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2172, "train_speed(iter/s)": 0.028927 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 130.47916984558105, "completions/min_length": 67.25, "epoch": 3.2382725241995534, "grad_norm": 0.7358370463536605, "kl": 0.29296875, "learning_rate": 7.719822498804727e-07, "loss": 0.00405162712559104, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2173, "train_speed(iter/s)": 0.02892 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 126.17708587646484, "completions/min_length": 50.75, "epoch": 3.2397617274758006, "grad_norm": 1.2179123750932257, "kl": 0.28564453125, "learning_rate": 7.717837750006106e-07, "loss": -0.0024563362821936607, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2174, "train_speed(iter/s)": 0.028923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 120.89583587646484, "completions/min_length": 63.5, "epoch": 3.241250930752048, "grad_norm": 0.6723260915187724, "kl": 0.28466796875, "learning_rate": 7.715852393176766e-07, "loss": 4.6448920329567045e-05, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2175, "train_speed(iter/s)": 0.028923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.0, "completions/mean_length": 124.48958587646484, "completions/min_length": 56.5, "epoch": 3.2427401340282946, "grad_norm": 0.6233061761887632, "kl": 0.2861328125, "learning_rate": 7.713866428760869e-07, "loss": -0.00038747210055589676, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2176, "train_speed(iter/s)": 0.028926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 124.80208778381348, "completions/min_length": 57.25, "epoch": 3.244229337304542, "grad_norm": 0.005910748687197932, "kl": 0.28369140625, "learning_rate": 7.71187985720271e-07, "loss": 0.00028365838807076216, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2177, "train_speed(iter/s)": 0.028926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 121.30208396911621, "completions/min_length": 51.0, "epoch": 3.245718540580789, "grad_norm": 0.683666015909661, "kl": 0.3017578125, "learning_rate": 7.709892678946723e-07, "loss": -0.004954935051500797, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2178, "train_speed(iter/s)": 0.028922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.75, "completions/mean_length": 119.77083587646484, "completions/min_length": 62.0, "epoch": 3.2472077438570364, "grad_norm": 0.0048563668959544564, "kl": 0.29296875, "learning_rate": 7.707904894437478e-07, "loss": 0.00029343401547521353, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2179, "train_speed(iter/s)": 0.028918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.75, "completions/mean_length": 129.47916793823242, "completions/min_length": 52.75, "epoch": 3.2486969471332836, "grad_norm": 0.5959677770670124, "kl": 0.2802734375, "learning_rate": 7.705916504119678e-07, "loss": 0.0007780602900311351, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2180, "train_speed(iter/s)": 0.028918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.5, "completions/mean_length": 124.79167366027832, "completions/min_length": 50.75, "epoch": 3.250186150409531, "grad_norm": 0.6994034775551762, "kl": 0.28125, "learning_rate": 7.703927508438162e-07, "loss": -0.015375478193163872, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2181, "train_speed(iter/s)": 0.028917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.5, "completions/mean_length": 140.42708587646484, "completions/min_length": 63.5, "epoch": 3.251675353685778, "grad_norm": 0.9762451585568652, "kl": 0.27197265625, "learning_rate": 7.701937907837906e-07, "loss": -0.004294486250728369, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.21978919208049774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2182, "train_speed(iter/s)": 0.028915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 116.22916984558105, "completions/min_length": 49.0, "epoch": 3.2531645569620253, "grad_norm": 0.004830826041660161, "kl": 0.29443359375, "learning_rate": 7.69994770276402e-07, "loss": 0.00029439516947604716, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2183, "train_speed(iter/s)": 0.028916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 123.94791984558105, "completions/min_length": 59.5, "epoch": 3.2546537602382726, "grad_norm": 1.0302560167492942, "kl": 0.29345703125, "learning_rate": 7.697956893661753e-07, "loss": -0.009324808605015278, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2184, "train_speed(iter/s)": 0.028915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 113.47916984558105, "completions/min_length": 53.0, "epoch": 3.25614296351452, "grad_norm": 0.7323622423190127, "kl": 0.30859375, "learning_rate": 7.695965480976481e-07, "loss": -0.00045589866931550205, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2185, "train_speed(iter/s)": 0.028912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.75, "completions/mean_length": 112.83333587646484, "completions/min_length": 58.75, "epoch": 3.257632166790767, "grad_norm": 1.3957153242499463, "kl": 0.3212890625, "learning_rate": 7.693973465153722e-07, "loss": -0.0036548625212162733, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.1497435588389635, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.48736217617988586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2186, "train_speed(iter/s)": 0.028912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 114.45833396911621, "completions/min_length": 46.25, "epoch": 3.2591213700670143, "grad_norm": 1.668034808819067, "kl": 0.3154296875, "learning_rate": 7.691980846639128e-07, "loss": 0.012218808755278587, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2187, "train_speed(iter/s)": 0.028911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 129.8541717529297, "completions/min_length": 57.25, "epoch": 3.2606105733432615, "grad_norm": 1.9573687414062173, "kl": 0.29052734375, "learning_rate": 7.689987625878483e-07, "loss": -0.022420767694711685, "memory(GiB)": 112.53, "reward": 1.9375000298023224, "reward_std": 0.1034691073000431, "rewards/CineAccuracyORM/mean": 0.9375000149011612, "rewards/CineAccuracyORM/std": 0.16575583815574646, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2188, "train_speed(iter/s)": 0.028904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.25, "completions/mean_length": 130.01041984558105, "completions/min_length": 54.0, "epoch": 3.2620997766195083, "grad_norm": 0.005429056701227, "kl": 0.2744140625, "learning_rate": 7.68799380331771e-07, "loss": 0.00027432688511908054, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2189, "train_speed(iter/s)": 0.028903 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.75, "completions/mean_length": 124.33333778381348, "completions/min_length": 49.25, "epoch": 3.2635889798957556, "grad_norm": 0.6776730362610773, "kl": 0.2802734375, "learning_rate": 7.685999379402861e-07, "loss": -0.0032172014471143484, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2190, "train_speed(iter/s)": 0.028906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 108.03125381469727, "completions/min_length": 50.75, "epoch": 3.265078183172003, "grad_norm": 1.3528703645148843, "kl": 0.3212890625, "learning_rate": 7.684004354580126e-07, "loss": -0.006101831793785095, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.4619346410036087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2191, "train_speed(iter/s)": 0.028906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.5, "completions/mean_length": 114.23958587646484, "completions/min_length": 61.75, "epoch": 3.26656738644825, "grad_norm": 0.793510537526877, "kl": 0.29248046875, "learning_rate": 7.682008729295833e-07, "loss": -0.006542135961353779, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2192, "train_speed(iter/s)": 0.028905 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 120.44791984558105, "completions/min_length": 68.0, "epoch": 3.2680565897244973, "grad_norm": 0.8860019927358663, "kl": 0.296875, "learning_rate": 7.680012503996436e-07, "loss": 0.010569370351731777, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.3676535338163376, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2193, "train_speed(iter/s)": 0.028899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.75, "completions/mean_length": 127.18750381469727, "completions/min_length": 72.5, "epoch": 3.2695457930007445, "grad_norm": 1.3008150660886992, "kl": 0.291015625, "learning_rate": 7.678015679128529e-07, "loss": -0.018491629511117935, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.10661446675658226, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.43859851360321045, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2194, "train_speed(iter/s)": 0.028899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.25, "completions/mean_length": 122.58333969116211, "completions/min_length": 53.25, "epoch": 3.271034996276992, "grad_norm": 1.0660460123110675, "kl": 0.28955078125, "learning_rate": 7.67601825513884e-07, "loss": -0.017473220825195312, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.3568698540329933, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2195, "train_speed(iter/s)": 0.028893 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.25, "completions/mean_length": 119.93750381469727, "completions/min_length": 52.5, "epoch": 3.272524199553239, "grad_norm": 1.5796151325273355, "kl": 0.32275390625, "learning_rate": 7.674020232474228e-07, "loss": 0.024679090827703476, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.1315174512565136, "rewards/CineAccuracyORM/mean": 0.583333358168602, "rewards/CineAccuracyORM/std": 0.48599863797426224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2196, "train_speed(iter/s)": 0.028889 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.0, "completions/mean_length": 127.92708587646484, "completions/min_length": 65.5, "epoch": 3.2740134028294863, "grad_norm": 1.0336937158864405, "kl": 0.28369140625, "learning_rate": 7.672021611581687e-07, "loss": 0.002851539757102728, "memory(GiB)": 112.53, "reward": 1.854166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2197, "train_speed(iter/s)": 0.028887 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 121.38541984558105, "completions/min_length": 56.25, "epoch": 3.2755026061057335, "grad_norm": 0.6068908729869072, "kl": 0.31396484375, "learning_rate": 7.670022392908349e-07, "loss": -0.0024946548510342836, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2198, "train_speed(iter/s)": 0.028889 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.5, "completions/mean_length": 120.35416793823242, "completions/min_length": 44.25, "epoch": 3.2769918093819808, "grad_norm": 0.0057287515453343565, "kl": 0.30078125, "learning_rate": 7.668022576901471e-07, "loss": 0.0003002019366249442, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2199, "train_speed(iter/s)": 0.028885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 127.40625381469727, "completions/min_length": 63.5, "epoch": 3.278481012658228, "grad_norm": 0.004232664660270243, "kl": 0.27490234375, "learning_rate": 7.666022164008456e-07, "loss": 0.00027524877805262804, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2200, "train_speed(iter/s)": 0.028888 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.5, "completions/mean_length": 140.46875190734863, "completions/min_length": 64.75, "epoch": 3.2799702159344752, "grad_norm": 0.9419563173524298, "kl": 0.27099609375, "learning_rate": 7.664021154676827e-07, "loss": 0.029710112139582634, "memory(GiB)": 112.53, "reward": 1.9791666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2201, "train_speed(iter/s)": 0.028891 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 112.77083587646484, "completions/min_length": 59.75, "epoch": 3.281459419210722, "grad_norm": 0.9185082622113161, "kl": 0.3037109375, "learning_rate": 7.662019549354252e-07, "loss": -0.0022730191703885794, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1550404578447342, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2202, "train_speed(iter/s)": 0.028895 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 112.33333587646484, "completions/min_length": 44.75, "epoch": 3.2829486224869693, "grad_norm": 0.7385810186530921, "kl": 0.306640625, "learning_rate": 7.660017348488525e-07, "loss": 0.0038380646146833897, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2203, "train_speed(iter/s)": 0.028892 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.75, "completions/mean_length": 127.7500057220459, "completions/min_length": 58.25, "epoch": 3.2844378257632165, "grad_norm": 0.7847912559639421, "kl": 0.281494140625, "learning_rate": 7.658014552527572e-07, "loss": 0.028155434876680374, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2204, "train_speed(iter/s)": 0.028891 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 119.26041984558105, "completions/min_length": 59.0, "epoch": 3.2859270290394638, "grad_norm": 0.5660588143267213, "kl": 0.3056640625, "learning_rate": 7.656011161919462e-07, "loss": -0.005871846340596676, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2205, "train_speed(iter/s)": 0.028895 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.5, "completions/mean_length": 137.37500190734863, "completions/min_length": 65.75, "epoch": 3.287416232315711, "grad_norm": 0.004328240325059928, "kl": 0.26123046875, "learning_rate": 7.654007177112385e-07, "loss": 0.00026050483575090766, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2206, "train_speed(iter/s)": 0.028886 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.75, "completions/mean_length": 128.35416793823242, "completions/min_length": 51.0, "epoch": 3.2889054355919582, "grad_norm": 0.8476671167277362, "kl": 0.2666015625, "learning_rate": 7.652002598554675e-07, "loss": -0.012410135939717293, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2207, "train_speed(iter/s)": 0.028882 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.5, "completions/mean_length": 119.06250190734863, "completions/min_length": 56.0, "epoch": 3.2903946388682055, "grad_norm": 0.7452178457171518, "kl": 0.289794921875, "learning_rate": 7.64999742669479e-07, "loss": -0.009674746543169022, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2208, "train_speed(iter/s)": 0.028882 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 129.60416793823242, "completions/min_length": 58.5, "epoch": 3.2918838421444527, "grad_norm": 0.004440883260620751, "kl": 0.267822265625, "learning_rate": 7.647991661981322e-07, "loss": 0.00026783125940710306, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2209, "train_speed(iter/s)": 0.028885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 125.44792366027832, "completions/min_length": 56.0, "epoch": 3.2933730454207, "grad_norm": 0.7193447772647091, "kl": 0.27734375, "learning_rate": 7.645985304863003e-07, "loss": 0.006530556827783585, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2210, "train_speed(iter/s)": 0.028882 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.75, "completions/mean_length": 130.41666793823242, "completions/min_length": 58.5, "epoch": 3.294862248696947, "grad_norm": 0.004257973260776344, "kl": 0.27587890625, "learning_rate": 7.643978355788688e-07, "loss": 0.00027591324760578573, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2211, "train_speed(iter/s)": 0.028878 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.0, "completions/mean_length": 135.22916793823242, "completions/min_length": 73.25, "epoch": 3.2963514519731945, "grad_norm": 0.8978252610938378, "kl": 0.32861328125, "learning_rate": 7.641970815207372e-07, "loss": 0.020871391519904137, "memory(GiB)": 112.53, "reward": 1.479166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.4791666865348816, "rewards/CineAccuracyORM/std": 0.4605609029531479, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2212, "train_speed(iter/s)": 0.028877 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.0, "completions/mean_length": 121.92708587646484, "completions/min_length": 54.5, "epoch": 3.2978406552494417, "grad_norm": 0.004595816075815962, "kl": 0.293212890625, "learning_rate": 7.639962683568177e-07, "loss": 0.00029356934828683734, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2213, "train_speed(iter/s)": 0.028876 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.75, "completions/mean_length": 121.86458587646484, "completions/min_length": 57.5, "epoch": 3.299329858525689, "grad_norm": 0.005020417756496675, "kl": 0.28369140625, "learning_rate": 7.63795396132036e-07, "loss": 0.0002833602484315634, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2214, "train_speed(iter/s)": 0.028879 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.75, "completions/mean_length": 136.52083778381348, "completions/min_length": 65.5, "epoch": 3.3008190618019357, "grad_norm": 1.251396711658826, "kl": 0.275390625, "learning_rate": 7.635944648913309e-07, "loss": -0.02079792320728302, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.12624847888946533, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3380490094423294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2215, "train_speed(iter/s)": 0.028869 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.0, "completions/mean_length": 137.6041717529297, "completions/min_length": 71.25, "epoch": 3.302308265078183, "grad_norm": 0.003758334475800203, "kl": 0.29150390625, "learning_rate": 7.633934746796544e-07, "loss": 0.0002913532080128789, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2216, "train_speed(iter/s)": 0.028864 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 354.5, "completions/mean_length": 146.31250381469727, "completions/min_length": 60.0, "epoch": 3.3037974683544302, "grad_norm": 0.5578433688081269, "kl": 0.260986328125, "learning_rate": 7.631924255419719e-07, "loss": -0.002530329395085573, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2217, "train_speed(iter/s)": 0.028862 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.25, "completions/mean_length": 114.20833587646484, "completions/min_length": 57.0, "epoch": 3.3052866716306775, "grad_norm": 0.6086678478403444, "kl": 0.29638671875, "learning_rate": 7.629913175232618e-07, "loss": -0.0026567280292510986, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2218, "train_speed(iter/s)": 0.028859 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.25, "completions/mean_length": 131.39583587646484, "completions/min_length": 56.75, "epoch": 3.3067758749069247, "grad_norm": 1.2135158487257645, "kl": 0.2783203125, "learning_rate": 7.627901506685156e-07, "loss": 0.0025872220285236835, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2219, "train_speed(iter/s)": 0.028858 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.75, "completions/mean_length": 137.0729217529297, "completions/min_length": 60.75, "epoch": 3.308265078183172, "grad_norm": 1.1520488978342098, "kl": 0.2529296875, "learning_rate": 7.62588925022738e-07, "loss": 0.004088897258043289, "memory(GiB)": 112.53, "reward": 1.875, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.1276884824037552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2220, "train_speed(iter/s)": 0.028857 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 131.10416984558105, "completions/min_length": 60.75, "epoch": 3.309754281459419, "grad_norm": 0.8514153424856535, "kl": 0.2890625, "learning_rate": 7.623876406309471e-07, "loss": -0.023386485874652863, "memory(GiB)": 112.53, "reward": 1.4687500596046448, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.4687500186264515, "rewards/CineAccuracyORM/std": 0.4520214945077896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2221, "train_speed(iter/s)": 0.028853 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.0, "completions/mean_length": 119.53125190734863, "completions/min_length": 57.75, "epoch": 3.3112434847356664, "grad_norm": 0.8179773941378128, "kl": 0.30078125, "learning_rate": 7.621862975381738e-07, "loss": -0.006965934298932552, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2222, "train_speed(iter/s)": 0.028856 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 128.98958587646484, "completions/min_length": 55.0, "epoch": 3.3127326880119137, "grad_norm": 0.5365403926466115, "kl": 0.2919921875, "learning_rate": 7.619848957894623e-07, "loss": 0.004312790930271149, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2223, "train_speed(iter/s)": 0.028856 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.5, "completions/mean_length": 137.71875381469727, "completions/min_length": 69.5, "epoch": 3.314221891288161, "grad_norm": 0.5320238490048832, "kl": 0.2724609375, "learning_rate": 7.617834354298699e-07, "loss": -0.006383850239217281, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2224, "train_speed(iter/s)": 0.028854 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.5, "completions/mean_length": 127.76042175292969, "completions/min_length": 65.25, "epoch": 3.315711094564408, "grad_norm": 0.5384736437629238, "kl": 0.29736328125, "learning_rate": 7.61581916504467e-07, "loss": -0.0024025309830904007, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2225, "train_speed(iter/s)": 0.028847 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.75, "completions/mean_length": 133.08333778381348, "completions/min_length": 56.25, "epoch": 3.3172002978406554, "grad_norm": 1.8556084642313133, "kl": 0.27978515625, "learning_rate": 7.613803390583373e-07, "loss": 0.0056696245446801186, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.3568698540329933, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2226, "train_speed(iter/s)": 0.028842 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 124.71875190734863, "completions/min_length": 58.25, "epoch": 3.3186895011169026, "grad_norm": 0.004142036833494495, "kl": 0.29443359375, "learning_rate": 7.611787031365774e-07, "loss": 0.00029444319079630077, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2227, "train_speed(iter/s)": 0.028845 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.25, "completions/mean_length": 123.89583778381348, "completions/min_length": 57.75, "epoch": 3.3201787043931494, "grad_norm": 0.8245790574868117, "kl": 0.2890625, "learning_rate": 7.609770087842968e-07, "loss": 0.012357095256447792, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2228, "train_speed(iter/s)": 0.028844 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.25, "completions/mean_length": 135.62500381469727, "completions/min_length": 63.0, "epoch": 3.3216679076693967, "grad_norm": 0.0044696882009379185, "kl": 0.294921875, "learning_rate": 7.607752560466183e-07, "loss": 0.0002948158362414688, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2229, "train_speed(iter/s)": 0.028836 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.25, "completions/mean_length": 132.21875381469727, "completions/min_length": 59.75, "epoch": 3.323157110945644, "grad_norm": 1.07560781357667, "kl": 0.2919921875, "learning_rate": 7.605734449686777e-07, "loss": 0.010451888665556908, "memory(GiB)": 112.53, "reward": 1.3437500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.3437500102445483, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2230, "train_speed(iter/s)": 0.028825 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.0, "completions/mean_length": 139.3958396911621, "completions/min_length": 58.5, "epoch": 3.324646314221891, "grad_norm": 1.1970764266269114, "kl": 0.293701171875, "learning_rate": 7.603715755956241e-07, "loss": 0.00847562961280346, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.1588500775396824, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.44886354357004166, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2231, "train_speed(iter/s)": 0.028817 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 116.43750381469727, "completions/min_length": 60.5, "epoch": 3.3261355174981384, "grad_norm": 0.004040343370735235, "kl": 0.2978515625, "learning_rate": 7.601696479726194e-07, "loss": 0.00029774580616503954, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2232, "train_speed(iter/s)": 0.028813 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 136.0416717529297, "completions/min_length": 63.0, "epoch": 3.3276247207743856, "grad_norm": 0.009183966136639571, "kl": 0.29443359375, "learning_rate": 7.599676621448383e-07, "loss": 0.0002942036953754723, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2233, "train_speed(iter/s)": 0.028809 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 130.14583778381348, "completions/min_length": 62.25, "epoch": 3.329113924050633, "grad_norm": 0.004034105807924492, "kl": 0.302734375, "learning_rate": 7.59765618157469e-07, "loss": 0.0003020272415596992, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2234, "train_speed(iter/s)": 0.028806 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.0, "completions/mean_length": 133.6354217529297, "completions/min_length": 58.25, "epoch": 3.33060312732688, "grad_norm": 0.8327064130467912, "kl": 0.2919921875, "learning_rate": 7.595635160557122e-07, "loss": 0.014032772742211819, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2235, "train_speed(iter/s)": 0.028802 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 123.46875190734863, "completions/min_length": 59.25, "epoch": 3.3320923306031274, "grad_norm": 0.004099084389015878, "kl": 0.28857421875, "learning_rate": 7.593613558847822e-07, "loss": 0.0002888422168325633, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2236, "train_speed(iter/s)": 0.028799 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.5, "completions/mean_length": 129.5833396911621, "completions/min_length": 59.5, "epoch": 3.3335815338793746, "grad_norm": 1.4718481430283468, "kl": 0.2802734375, "learning_rate": 7.59159137689906e-07, "loss": -0.01698710210621357, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.1588566154241562, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.43965786695480347, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2237, "train_speed(iter/s)": 0.028797 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.75, "completions/mean_length": 120.86458587646484, "completions/min_length": 57.25, "epoch": 3.335070737155622, "grad_norm": 0.00410273105112315, "kl": 0.3095703125, "learning_rate": 7.589568615163233e-07, "loss": 0.0003096057625953108, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2238, "train_speed(iter/s)": 0.0288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 121.95833969116211, "completions/min_length": 61.75, "epoch": 3.336559940431869, "grad_norm": 0.0038786227223928343, "kl": 0.29052734375, "learning_rate": 7.587545274092873e-07, "loss": 0.00029058969812467694, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2239, "train_speed(iter/s)": 0.028792 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 130.35416793823242, "completions/min_length": 57.0, "epoch": 3.3380491437081163, "grad_norm": 0.7501710801421301, "kl": 0.2763671875, "learning_rate": 7.585521354140637e-07, "loss": -0.017109137028455734, "memory(GiB)": 112.53, "reward": 1.4479166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.4479166716337204, "rewards/CineAccuracyORM/std": 0.3717081770300865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2240, "train_speed(iter/s)": 0.028788 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.0, "completions/mean_length": 117.73958587646484, "completions/min_length": 57.75, "epoch": 3.339538346984363, "grad_norm": 0.0033864335834884866, "kl": 0.279052734375, "learning_rate": 7.583496855759315e-07, "loss": 0.0002791321312543005, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2241, "train_speed(iter/s)": 0.028792 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.75, "completions/mean_length": 130.09375381469727, "completions/min_length": 59.75, "epoch": 3.3410275502606104, "grad_norm": 0.8888647029612714, "kl": 0.26953125, "learning_rate": 7.581471779401822e-07, "loss": 0.014931892976164818, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.23782580345869064, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2242, "train_speed(iter/s)": 0.028786 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 114.98958396911621, "completions/min_length": 55.75, "epoch": 3.3425167535368576, "grad_norm": 0.004003900681724859, "kl": 0.3095703125, "learning_rate": 7.57944612552121e-07, "loss": 0.0003099401365034282, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2243, "train_speed(iter/s)": 0.028788 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.75, "completions/mean_length": 121.81250190734863, "completions/min_length": 65.0, "epoch": 3.344005956813105, "grad_norm": 0.965033803000166, "kl": 0.3046875, "learning_rate": 7.577419894570649e-07, "loss": 0.015795107930898666, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2244, "train_speed(iter/s)": 0.02878 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 114.32291984558105, "completions/min_length": 65.25, "epoch": 3.345495160089352, "grad_norm": 0.004562242033967875, "kl": 0.30224609375, "learning_rate": 7.57539308700345e-07, "loss": 0.00030203734058886766, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2245, "train_speed(iter/s)": 0.02878 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.0, "completions/mean_length": 124.91666984558105, "completions/min_length": 53.0, "epoch": 3.3469843633655993, "grad_norm": 0.003847194526437026, "kl": 0.27197265625, "learning_rate": 7.573365703273045e-07, "loss": 0.0002721174096222967, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2246, "train_speed(iter/s)": 0.02878 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 108.41666793823242, "completions/min_length": 53.5, "epoch": 3.3484735666418466, "grad_norm": 1.2142423756380274, "kl": 0.32373046875, "learning_rate": 7.571337743832997e-07, "loss": 0.011415685527026653, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.3019092120230198, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2247, "train_speed(iter/s)": 0.02878 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.25, "completions/mean_length": 129.29167366027832, "completions/min_length": 49.5, "epoch": 3.349962769918094, "grad_norm": 1.046644440854961, "kl": 0.2626953125, "learning_rate": 7.569309209137e-07, "loss": 0.012304048985242844, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.16161249950528145, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2248, "train_speed(iter/s)": 0.028775 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.0, "completions/mean_length": 115.72916984558105, "completions/min_length": 41.75, "epoch": 3.351451973194341, "grad_norm": 0.6478691986504699, "kl": 0.31591796875, "learning_rate": 7.567280099638873e-07, "loss": -0.003278914373368025, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2249, "train_speed(iter/s)": 0.028771 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 122.44792175292969, "completions/min_length": 60.0, "epoch": 3.3529411764705883, "grad_norm": 1.1628211788546712, "kl": 0.2861328125, "learning_rate": 7.565250415792566e-07, "loss": 0.004717519041150808, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.4023842103779316, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2250, "train_speed(iter/s)": 0.028771 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 126.84375190734863, "completions/min_length": 52.0, "epoch": 3.3544303797468356, "grad_norm": 0.0034504868130379525, "kl": 0.2900390625, "learning_rate": 7.563220158052157e-07, "loss": 0.00029019956127740443, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2251, "train_speed(iter/s)": 0.02877 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 119.35416793823242, "completions/min_length": 61.5, "epoch": 3.355919583023083, "grad_norm": 0.4825317486569077, "kl": 0.29248046875, "learning_rate": 7.561189326871852e-07, "loss": -0.014415081590414047, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2252, "train_speed(iter/s)": 0.028767 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.5, "completions/mean_length": 116.25000381469727, "completions/min_length": 51.25, "epoch": 3.35740878629933, "grad_norm": 0.7010247332049485, "kl": 0.39404296875, "learning_rate": 7.559157922705988e-07, "loss": 0.011771872639656067, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2253, "train_speed(iter/s)": 0.028767 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 121.75000381469727, "completions/min_length": 59.75, "epoch": 3.358897989575577, "grad_norm": 0.003911302596215304, "kl": 0.2802734375, "learning_rate": 7.557125946009023e-07, "loss": 0.0002802566159516573, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2254, "train_speed(iter/s)": 0.02877 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 117.8750057220459, "completions/min_length": 62.25, "epoch": 3.3603871928518245, "grad_norm": 0.0036211706096194616, "kl": 0.29736328125, "learning_rate": 7.555093397235551e-07, "loss": 0.0002967619802802801, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2255, "train_speed(iter/s)": 0.028767 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.5, "completions/mean_length": 125.07291984558105, "completions/min_length": 51.5, "epoch": 3.3618763961280713, "grad_norm": 0.0039520305271880446, "kl": 0.28759765625, "learning_rate": 7.553060276840292e-07, "loss": 0.0002881151740439236, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2256, "train_speed(iter/s)": 0.028763 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 126.02083778381348, "completions/min_length": 65.5, "epoch": 3.3633655994043186, "grad_norm": 0.9538527484127489, "kl": 0.2998046875, "learning_rate": 7.551026585278091e-07, "loss": 0.004771902691572905, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2257, "train_speed(iter/s)": 0.028759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 115.94791793823242, "completions/min_length": 59.75, "epoch": 3.364854802680566, "grad_norm": 1.13432030801907, "kl": 0.3037109375, "learning_rate": 7.548992323003922e-07, "loss": 0.006142579019069672, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2258, "train_speed(iter/s)": 0.028758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 115.96875190734863, "completions/min_length": 53.5, "epoch": 3.366344005956813, "grad_norm": 0.00359839722335458, "kl": 0.2998046875, "learning_rate": 7.546957490472887e-07, "loss": 0.00030005935695953667, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2259, "train_speed(iter/s)": 0.028759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 115.36458587646484, "completions/min_length": 55.5, "epoch": 3.3678332092330603, "grad_norm": 0.7437260355035589, "kl": 0.29931640625, "learning_rate": 7.544922088140219e-07, "loss": -0.0077240001410245895, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2260, "train_speed(iter/s)": 0.028759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 113.40625190734863, "completions/min_length": 54.0, "epoch": 3.3693224125093075, "grad_norm": 0.9898755850507289, "kl": 0.2841796875, "learning_rate": 7.542886116461271e-07, "loss": -0.01517687737941742, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3320881873369217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2261, "train_speed(iter/s)": 0.028759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 116.75000190734863, "completions/min_length": 56.5, "epoch": 3.3708116157855548, "grad_norm": 0.004383295322458936, "kl": 0.3154296875, "learning_rate": 7.54084957589153e-07, "loss": 0.0003155809245072305, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2262, "train_speed(iter/s)": 0.028759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 111.92708396911621, "completions/min_length": 59.25, "epoch": 3.372300819061802, "grad_norm": 1.4093985349388622, "kl": 0.31103515625, "learning_rate": 7.538812466886609e-07, "loss": -0.03718190640211105, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.08838834799826145, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2263, "train_speed(iter/s)": 0.028762 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 106.00000190734863, "completions/min_length": 45.0, "epoch": 3.3737900223380493, "grad_norm": 1.1746902740629828, "kl": 0.33740234375, "learning_rate": 7.536774789902245e-07, "loss": -0.022912874817848206, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.4177052788436413, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2264, "train_speed(iter/s)": 0.028759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.0, "completions/mean_length": 114.18750190734863, "completions/min_length": 52.0, "epoch": 3.3752792256142965, "grad_norm": 0.5971732807240863, "kl": 0.2939453125, "learning_rate": 7.534736545394305e-07, "loss": 0.006738629192113876, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2265, "train_speed(iter/s)": 0.028762 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.75, "completions/mean_length": 121.33333778381348, "completions/min_length": 47.25, "epoch": 3.3767684288905437, "grad_norm": 0.5732868288895925, "kl": 0.310546875, "learning_rate": 7.53269773381878e-07, "loss": -0.007858753204345703, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2266, "train_speed(iter/s)": 0.028758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 111.42708778381348, "completions/min_length": 58.75, "epoch": 3.3782576321667905, "grad_norm": 0.004073568566764565, "kl": 0.3125, "learning_rate": 7.530658355631794e-07, "loss": 0.00031212385511025786, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2267, "train_speed(iter/s)": 0.028757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 116.02083396911621, "completions/min_length": 50.5, "epoch": 3.379746835443038, "grad_norm": 0.6986067453032095, "kl": 0.3037109375, "learning_rate": 7.528618411289591e-07, "loss": 0.00798245333135128, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2268, "train_speed(iter/s)": 0.028758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 113.60416984558105, "completions/min_length": 55.25, "epoch": 3.381236038719285, "grad_norm": 0.003343799765386492, "kl": 0.2900390625, "learning_rate": 7.526577901248543e-07, "loss": 0.0002902264823205769, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2269, "train_speed(iter/s)": 0.028755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 108.89583587646484, "completions/min_length": 40.5, "epoch": 3.3827252419955323, "grad_norm": 1.3281969610599282, "kl": 0.318359375, "learning_rate": 7.524536825965153e-07, "loss": 0.021617621183395386, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4369966685771942, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2270, "train_speed(iter/s)": 0.028755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 112.15625381469727, "completions/min_length": 53.5, "epoch": 3.3842144452717795, "grad_norm": 0.0033905338848676868, "kl": 0.28271484375, "learning_rate": 7.522495185896047e-07, "loss": 0.00028244988061487675, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2271, "train_speed(iter/s)": 0.028752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 108.41666793823242, "completions/min_length": 53.25, "epoch": 3.3857036485480267, "grad_norm": 0.5514808346990836, "kl": 0.29638671875, "learning_rate": 7.520452981497977e-07, "loss": 0.002188281388953328, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2272, "train_speed(iter/s)": 0.028745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 112.08333587646484, "completions/min_length": 44.75, "epoch": 3.387192851824274, "grad_norm": 1.0432327321346135, "kl": 0.30810546875, "learning_rate": 7.518410213227821e-07, "loss": -0.006432909052819014, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.17963136732578278, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2273, "train_speed(iter/s)": 0.028745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.75, "completions/mean_length": 106.41666793823242, "completions/min_length": 50.5, "epoch": 3.3886820551005212, "grad_norm": 0.6862276470808835, "kl": 0.3115234375, "learning_rate": 7.516366881542586e-07, "loss": -1.6046969903982244e-05, "memory(GiB)": 112.53, "reward": 1.4270834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833460614085, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2274, "train_speed(iter/s)": 0.028749 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 113.94791793823242, "completions/min_length": 45.25, "epoch": 3.3901712583767685, "grad_norm": 0.0033269473605908283, "kl": 0.30419921875, "learning_rate": 7.514322986899403e-07, "loss": 0.00030397684895433486, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2275, "train_speed(iter/s)": 0.028745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 115.31250190734863, "completions/min_length": 56.75, "epoch": 3.3916604616530157, "grad_norm": 0.6576355333318572, "kl": 0.29931640625, "learning_rate": 7.512278529755528e-07, "loss": 0.0008744113147258759, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2276, "train_speed(iter/s)": 0.028748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.25, "completions/mean_length": 115.77083587646484, "completions/min_length": 46.5, "epoch": 3.393149664929263, "grad_norm": 1.3667244186478122, "kl": 0.28564453125, "learning_rate": 7.510233510568344e-07, "loss": 0.006216385867446661, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.46184761822223663, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2277, "train_speed(iter/s)": 0.028748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 122.38542175292969, "completions/min_length": 57.25, "epoch": 3.39463886820551, "grad_norm": 0.6310714428848189, "kl": 0.294921875, "learning_rate": 7.508187929795361e-07, "loss": 0.002972155576571822, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2278, "train_speed(iter/s)": 0.028751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 106.10416984558105, "completions/min_length": 51.5, "epoch": 3.3961280714817574, "grad_norm": 0.9541902949665138, "kl": 0.31396484375, "learning_rate": 7.506141787894212e-07, "loss": 0.0012833576183766127, "memory(GiB)": 112.53, "reward": 1.854166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2279, "train_speed(iter/s)": 0.028745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 118.6562557220459, "completions/min_length": 55.0, "epoch": 3.3976172747580042, "grad_norm": 0.004087091423976405, "kl": 0.27783203125, "learning_rate": 7.504095085322657e-07, "loss": 0.0002777896006591618, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2280, "train_speed(iter/s)": 0.028748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 108.03125190734863, "completions/min_length": 54.0, "epoch": 3.399106478034252, "grad_norm": 0.6497540489987929, "kl": 0.314453125, "learning_rate": 7.502047822538584e-07, "loss": 0.011457296088337898, "memory(GiB)": 112.53, "reward": 1.447916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2281, "train_speed(iter/s)": 0.028748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.75, "completions/mean_length": 114.88541793823242, "completions/min_length": 51.25, "epoch": 3.4005956813104987, "grad_norm": 1.0023979717953753, "kl": 0.28466796875, "learning_rate": 7.5e-07, "loss": -0.0004084284300915897, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.11572097800672054, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.4321904703974724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2282, "train_speed(iter/s)": 0.02874 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.25, "completions/mean_length": 111.79166984558105, "completions/min_length": 43.25, "epoch": 3.402084884586746, "grad_norm": 0.8838975103150771, "kl": 0.294921875, "learning_rate": 7.497951618165041e-07, "loss": -0.019268516451120377, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.35000117123126984, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2283, "train_speed(iter/s)": 0.028736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 114.76041984558105, "completions/min_length": 45.25, "epoch": 3.403574087862993, "grad_norm": 1.258747335228342, "kl": 0.28955078125, "learning_rate": 7.495902677491972e-07, "loss": 0.015248875133693218, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.1308017373085022, "rewards/CineAccuracyORM/mean": 0.6875000223517418, "rewards/CineAccuracyORM/std": 0.44651634991168976, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2284, "train_speed(iter/s)": 0.028736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 105.82291984558105, "completions/min_length": 52.5, "epoch": 3.4050632911392404, "grad_norm": 0.003342183541752631, "kl": 0.29833984375, "learning_rate": 7.493853178439175e-07, "loss": 0.0002979194396175444, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2285, "train_speed(iter/s)": 0.028735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.25, "completions/mean_length": 119.36458778381348, "completions/min_length": 54.25, "epoch": 3.4065524944154877, "grad_norm": 1.4511835842606535, "kl": 0.26904296875, "learning_rate": 7.491803121465164e-07, "loss": -0.01130268257111311, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5312500204890966, "rewards/CineAccuracyORM/std": 0.42743058502674103, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2286, "train_speed(iter/s)": 0.028738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 110.55208587646484, "completions/min_length": 55.75, "epoch": 3.408041697691735, "grad_norm": 0.0032813715858304015, "kl": 0.29833984375, "learning_rate": 7.489752507028572e-07, "loss": 0.00029796993476338685, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2287, "train_speed(iter/s)": 0.02874 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.75, "completions/mean_length": 96.96875381469727, "completions/min_length": 35.75, "epoch": 3.409530900967982, "grad_norm": 0.6377718610281741, "kl": 0.31591796875, "learning_rate": 7.487701335588159e-07, "loss": -0.009501989930868149, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2288, "train_speed(iter/s)": 0.028734 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.75, "completions/mean_length": 110.11458778381348, "completions/min_length": 59.75, "epoch": 3.4110201042442294, "grad_norm": 0.5519553619939467, "kl": 0.30078125, "learning_rate": 7.485649607602814e-07, "loss": 0.004154066555202007, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2289, "train_speed(iter/s)": 0.028734 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 110.62500190734863, "completions/min_length": 44.75, "epoch": 3.4125093075204767, "grad_norm": 0.0036450317069549763, "kl": 0.30078125, "learning_rate": 7.483597323531544e-07, "loss": 0.00030124784098006785, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2290, "train_speed(iter/s)": 0.028728 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.5, "completions/mean_length": 105.77083396911621, "completions/min_length": 48.0, "epoch": 3.413998510796724, "grad_norm": 0.9851327504967109, "kl": 0.31982421875, "learning_rate": 7.481544483833484e-07, "loss": 0.0003303165431134403, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.833333358168602, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2291, "train_speed(iter/s)": 0.028732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 101.38542175292969, "completions/min_length": 37.5, "epoch": 3.415487714072971, "grad_norm": 0.003699655705530088, "kl": 0.3046875, "learning_rate": 7.47949108896789e-07, "loss": 0.00030488110496662557, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2292, "train_speed(iter/s)": 0.028735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 106.04166984558105, "completions/min_length": 43.75, "epoch": 3.416976917349218, "grad_norm": 0.0036837749812070773, "kl": 0.28564453125, "learning_rate": 7.477437139394146e-07, "loss": 0.00028561861836351454, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2293, "train_speed(iter/s)": 0.028727 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.5, "completions/mean_length": 101.68750190734863, "completions/min_length": 42.5, "epoch": 3.4184661206254656, "grad_norm": 0.8298698905735487, "kl": 0.3349609375, "learning_rate": 7.47538263557176e-07, "loss": -0.001460524508729577, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.3470437303185463, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2294, "train_speed(iter/s)": 0.028721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 114.23958778381348, "completions/min_length": 53.75, "epoch": 3.4199553239017124, "grad_norm": 0.5935864481084799, "kl": 0.275390625, "learning_rate": 7.473327577960362e-07, "loss": 0.015757480636239052, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2295, "train_speed(iter/s)": 0.028724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.25, "completions/mean_length": 105.12500190734863, "completions/min_length": 52.25, "epoch": 3.4214445271779597, "grad_norm": 0.0035229985882652038, "kl": 0.29833984375, "learning_rate": 7.471271967019706e-07, "loss": 0.00029763946076855063, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2296, "train_speed(iter/s)": 0.028717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 106.86458396911621, "completions/min_length": 44.0, "epoch": 3.422933730454207, "grad_norm": 0.8434548371796134, "kl": 0.29638671875, "learning_rate": 7.46921580320967e-07, "loss": 0.01646355539560318, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2297, "train_speed(iter/s)": 0.028717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 111.34375, "completions/min_length": 43.0, "epoch": 3.424422933730454, "grad_norm": 0.0034351029849304026, "kl": 0.2919921875, "learning_rate": 7.467159086990255e-07, "loss": 0.00029214390087872744, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2298, "train_speed(iter/s)": 0.028717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 105.56250190734863, "completions/min_length": 49.5, "epoch": 3.4259121370067014, "grad_norm": 0.801297723833237, "kl": 0.302734375, "learning_rate": 7.465101818821587e-07, "loss": -0.002667632419615984, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2299, "train_speed(iter/s)": 0.028714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 108.63542175292969, "completions/min_length": 40.5, "epoch": 3.4274013402829486, "grad_norm": 0.6426130841864068, "kl": 0.2998046875, "learning_rate": 7.463043999163918e-07, "loss": 0.004549182485789061, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.770833333954215, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2300, "train_speed(iter/s)": 0.028717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 100.29166984558105, "completions/min_length": 35.5, "epoch": 3.428890543559196, "grad_norm": 0.5328858887806333, "kl": 0.3046875, "learning_rate": 7.460985628477618e-07, "loss": -0.014806471765041351, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2301, "train_speed(iter/s)": 0.02872 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 100.89583778381348, "completions/min_length": 49.5, "epoch": 3.430379746835443, "grad_norm": 0.002902520573238216, "kl": 0.294921875, "learning_rate": 7.458926707223183e-07, "loss": 0.00029494432965293527, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2302, "train_speed(iter/s)": 0.028721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 99.58333587646484, "completions/min_length": 41.5, "epoch": 3.4318689501116904, "grad_norm": 1.3737741414317273, "kl": 0.31298828125, "learning_rate": 7.456867235861231e-07, "loss": 0.010757410898804665, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.45247404277324677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2303, "train_speed(iter/s)": 0.028718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.25, "completions/mean_length": 99.05208587646484, "completions/min_length": 30.75, "epoch": 3.4333581533879376, "grad_norm": 0.0034704863329141525, "kl": 0.29345703125, "learning_rate": 7.454807214852505e-07, "loss": 0.0002935868105851114, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2304, "train_speed(iter/s)": 0.028718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 104.77083396911621, "completions/min_length": 41.75, "epoch": 3.434847356664185, "grad_norm": 1.1541749802978218, "kl": 0.30517578125, "learning_rate": 7.45274664465787e-07, "loss": 0.00446919584646821, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.802083358168602, "rewards/CineAccuracyORM/std": 0.33163563907146454, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2305, "train_speed(iter/s)": 0.028719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 103.22916793823242, "completions/min_length": 41.0, "epoch": 3.4363365599404316, "grad_norm": 0.004257667068438213, "kl": 0.31298828125, "learning_rate": 7.450685525738314e-07, "loss": 0.0003129348624497652, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2306, "train_speed(iter/s)": 0.028713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 106.92708587646484, "completions/min_length": 47.75, "epoch": 3.4378257632166793, "grad_norm": 0.004637798445230161, "kl": 0.3056640625, "learning_rate": 7.448623858554947e-07, "loss": 0.00030560867162421346, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2307, "train_speed(iter/s)": 0.028711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 96.85416984558105, "completions/min_length": 45.75, "epoch": 3.439314966492926, "grad_norm": 0.7048262760822764, "kl": 0.3115234375, "learning_rate": 7.446561643569001e-07, "loss": -0.007558409124612808, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2308, "train_speed(iter/s)": 0.02871 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 330.75, "completions/mean_length": 110.55208396911621, "completions/min_length": 53.0, "epoch": 3.4408041697691734, "grad_norm": 0.6835820944433286, "kl": 0.28369140625, "learning_rate": 7.444498881241834e-07, "loss": -0.003636349691078067, "memory(GiB)": 112.53, "reward": 1.3958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.3958333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2309, "train_speed(iter/s)": 0.028706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 103.34375381469727, "completions/min_length": 43.5, "epoch": 3.4422933730454206, "grad_norm": 1.0707355912064667, "kl": 0.30859375, "learning_rate": 7.442435572034924e-07, "loss": 0.025682851672172546, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2310, "train_speed(iter/s)": 0.028706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 115.09375381469727, "completions/min_length": 41.25, "epoch": 3.443782576321668, "grad_norm": 0.003957944086998596, "kl": 0.294921875, "learning_rate": 7.440371716409869e-07, "loss": 0.000294604804366827, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2311, "train_speed(iter/s)": 0.028697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 101.85416984558105, "completions/min_length": 34.75, "epoch": 3.445271779597915, "grad_norm": 0.0038465725864193227, "kl": 0.29248046875, "learning_rate": 7.438307314828393e-07, "loss": 0.00029293232364580035, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2312, "train_speed(iter/s)": 0.028693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 106.65625190734863, "completions/min_length": 45.25, "epoch": 3.4467609828741623, "grad_norm": 0.7580543414924782, "kl": 0.2900390625, "learning_rate": 7.436242367752343e-07, "loss": 0.00731274439021945, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2313, "train_speed(iter/s)": 0.028692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 97.25000190734863, "completions/min_length": 41.5, "epoch": 3.4482501861504096, "grad_norm": 1.0408036866522463, "kl": 0.29833984375, "learning_rate": 7.434176875643685e-07, "loss": -0.0034815098624676466, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.3085566312074661, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2314, "train_speed(iter/s)": 0.028695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 105.82292175292969, "completions/min_length": 52.0, "epoch": 3.449739389426657, "grad_norm": 0.003567107658432358, "kl": 0.29638671875, "learning_rate": 7.432110838964507e-07, "loss": 0.00029630062635987997, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2315, "train_speed(iter/s)": 0.028692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.25, "completions/mean_length": 102.14583587646484, "completions/min_length": 46.5, "epoch": 3.451228592702904, "grad_norm": 0.0035178650922975834, "kl": 0.3046875, "learning_rate": 7.43004425817702e-07, "loss": 0.0003048075595870614, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2316, "train_speed(iter/s)": 0.028692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 94.67708587646484, "completions/min_length": 40.5, "epoch": 3.4527177959791513, "grad_norm": 0.0038930648820449548, "kl": 0.31103515625, "learning_rate": 7.427977133743555e-07, "loss": 0.0003103461058344692, "memory(GiB)": 112.53, "reward": 1.2500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.2500000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2317, "train_speed(iter/s)": 0.028695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 95.78125190734863, "completions/min_length": 44.0, "epoch": 3.4542069992553985, "grad_norm": 0.00370976768816041, "kl": 0.32275390625, "learning_rate": 7.425909466126568e-07, "loss": 0.00032273560645990074, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2318, "train_speed(iter/s)": 0.028693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 96.04166793823242, "completions/min_length": 42.0, "epoch": 3.4556962025316453, "grad_norm": 0.01662774547548506, "kl": 0.34912109375, "learning_rate": 7.423841255788635e-07, "loss": 0.00034916377626359463, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2319, "train_speed(iter/s)": 0.028692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.75, "completions/mean_length": 102.35416793823242, "completions/min_length": 39.0, "epoch": 3.457185405807893, "grad_norm": 0.9707734782303512, "kl": 0.29736328125, "learning_rate": 7.421772503192452e-07, "loss": -0.0001440246996935457, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.895833358168602, "rewards/CineAccuracyORM/std": 0.2224479243159294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2320, "train_speed(iter/s)": 0.028695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 103.375, "completions/min_length": 52.5, "epoch": 3.45867460908414, "grad_norm": 0.6326962850643331, "kl": 0.30029296875, "learning_rate": 7.419703208800838e-07, "loss": -0.006096666678786278, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2321, "train_speed(iter/s)": 0.028696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 99.17708587646484, "completions/min_length": 31.0, "epoch": 3.460163812360387, "grad_norm": 1.1895187455291891, "kl": 0.31640625, "learning_rate": 7.41763337307673e-07, "loss": -0.00030278682243078947, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2322, "train_speed(iter/s)": 0.028697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.5, "completions/mean_length": 92.77083587646484, "completions/min_length": 36.5, "epoch": 3.4616530156366343, "grad_norm": 0.46850643200842845, "kl": 0.33642578125, "learning_rate": 7.415562996483192e-07, "loss": -0.013329084031283855, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2323, "train_speed(iter/s)": 0.02869 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 96.05208587646484, "completions/min_length": 34.5, "epoch": 3.4631422189128815, "grad_norm": 0.9961043296432008, "kl": 0.3037109375, "learning_rate": 7.413492079483404e-07, "loss": 0.006895036436617374, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2324, "train_speed(iter/s)": 0.028686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 88.92708587646484, "completions/min_length": 39.0, "epoch": 3.464631422189129, "grad_norm": 1.8260569695223436, "kl": 0.34130859375, "learning_rate": 7.41142062254067e-07, "loss": -0.00428007822483778, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3085566312074661, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2325, "train_speed(iter/s)": 0.028687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 89.58333587646484, "completions/min_length": 37.0, "epoch": 3.466120625465376, "grad_norm": 0.0031825784621592394, "kl": 0.326171875, "learning_rate": 7.409348626118411e-07, "loss": 0.000326530629536137, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2326, "train_speed(iter/s)": 0.02868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/mean_length": 98.29166984558105, "completions/min_length": 33.25, "epoch": 3.4676098287416233, "grad_norm": 0.7859202740625365, "kl": 0.3193359375, "learning_rate": 7.407276090680172e-07, "loss": 0.011466994881629944, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2327, "train_speed(iter/s)": 0.02868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 100.71875190734863, "completions/min_length": 51.25, "epoch": 3.4690990320178705, "grad_norm": 0.0035280352476228425, "kl": 0.29296875, "learning_rate": 7.40520301668962e-07, "loss": 0.00029289492522366345, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2328, "train_speed(iter/s)": 0.02868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 98.61458587646484, "completions/min_length": 37.75, "epoch": 3.4705882352941178, "grad_norm": 0.008472356851023098, "kl": 0.32421875, "learning_rate": 7.403129404610537e-07, "loss": 0.0003238365752622485, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2329, "train_speed(iter/s)": 0.02868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 89.03125381469727, "completions/min_length": 37.5, "epoch": 3.472077438570365, "grad_norm": 1.5525714516136382, "kl": 0.3212890625, "learning_rate": 7.401055254906828e-07, "loss": 0.006137471180409193, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2330, "train_speed(iter/s)": 0.028678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 97.82291984558105, "completions/min_length": 43.0, "epoch": 3.4735666418466122, "grad_norm": 0.9052118848260742, "kl": 0.28515625, "learning_rate": 7.398980568042521e-07, "loss": 0.0008872334146872163, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2331, "train_speed(iter/s)": 0.028678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 95.98958587646484, "completions/min_length": 39.5, "epoch": 3.475055845122859, "grad_norm": 0.7485913716919804, "kl": 0.318359375, "learning_rate": 7.396905344481761e-07, "loss": 0.0013074600137770176, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2332, "train_speed(iter/s)": 0.028675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 92.08333778381348, "completions/min_length": 38.0, "epoch": 3.4765450483991067, "grad_norm": 0.0032396229222629606, "kl": 0.31787109375, "learning_rate": 7.394829584688815e-07, "loss": 0.0003178896149620414, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2333, "train_speed(iter/s)": 0.028675 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 91.44791793823242, "completions/min_length": 40.75, "epoch": 3.4780342516753535, "grad_norm": 1.2201329742390692, "kl": 0.32470703125, "learning_rate": 7.392753289128069e-07, "loss": -0.0035099158994853497, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2334, "train_speed(iter/s)": 0.028676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 100.59375190734863, "completions/min_length": 39.5, "epoch": 3.4795234549516008, "grad_norm": 0.0034735904879404336, "kl": 0.31005859375, "learning_rate": 7.390676458264027e-07, "loss": 0.000309435126837343, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2335, "train_speed(iter/s)": 0.028679 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.25, "completions/mean_length": 101.85416793823242, "completions/min_length": 34.75, "epoch": 3.481012658227848, "grad_norm": 1.4001739551852397, "kl": 0.2841796875, "learning_rate": 7.388599092561314e-07, "loss": -0.010803508572280407, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2336, "train_speed(iter/s)": 0.028676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 102.85416793823242, "completions/min_length": 45.75, "epoch": 3.4825018615040952, "grad_norm": 0.003074413690887998, "kl": 0.3037109375, "learning_rate": 7.386521192484678e-07, "loss": 0.000303948821965605, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2337, "train_speed(iter/s)": 0.028676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.25, "completions/mean_length": 89.22916793823242, "completions/min_length": 40.0, "epoch": 3.4839910647803425, "grad_norm": 1.11439988432247, "kl": 0.32080078125, "learning_rate": 7.384442758498983e-07, "loss": -0.022217731922864914, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3637066036462784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2338, "train_speed(iter/s)": 0.028676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 102.93750190734863, "completions/min_length": 39.5, "epoch": 3.4854802680565897, "grad_norm": 0.0032248984634328857, "kl": 0.294921875, "learning_rate": 7.382363791069213e-07, "loss": 0.00029431702569127083, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2339, "train_speed(iter/s)": 0.028676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 86.00000190734863, "completions/min_length": 38.75, "epoch": 3.486969471332837, "grad_norm": 0.0039286632236966886, "kl": 0.3271484375, "learning_rate": 7.380284290660471e-07, "loss": 0.00032717929570935667, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2340, "train_speed(iter/s)": 0.028674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 108.28125381469727, "completions/min_length": 46.25, "epoch": 3.488458674609084, "grad_norm": 0.003253465838699004, "kl": 0.29931640625, "learning_rate": 7.37820425773798e-07, "loss": 0.00029948947485536337, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2341, "train_speed(iter/s)": 0.028671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.5, "completions/mean_length": 85.69791793823242, "completions/min_length": 32.25, "epoch": 3.4899478778853315, "grad_norm": 0.0038044629669702623, "kl": 0.3173828125, "learning_rate": 7.376123692767082e-07, "loss": 0.00031731181661598384, "memory(GiB)": 112.53, "reward": 1.3333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.3333333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2342, "train_speed(iter/s)": 0.028668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.0, "completions/mean_length": 89.38541984558105, "completions/min_length": 36.25, "epoch": 3.4914370811615787, "grad_norm": 0.003088560333090048, "kl": 0.3046875, "learning_rate": 7.374042596213239e-07, "loss": 0.00030482481815852225, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2343, "train_speed(iter/s)": 0.028666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 97.71875381469727, "completions/min_length": 42.25, "epoch": 3.492926284437826, "grad_norm": 0.8564792240204854, "kl": 0.3310546875, "learning_rate": 7.37196096854203e-07, "loss": -0.006989004090428352, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2344, "train_speed(iter/s)": 0.028669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 100.35416793823242, "completions/min_length": 43.25, "epoch": 3.4944154877140727, "grad_norm": 0.003514544143592532, "kl": 0.31787109375, "learning_rate": 7.369878810219154e-07, "loss": 0.000317948404699564, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2345, "train_speed(iter/s)": 0.028666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 97.84375190734863, "completions/min_length": 40.75, "epoch": 3.4959046909903204, "grad_norm": 0.5984284811601277, "kl": 0.302490234375, "learning_rate": 7.367796121710427e-07, "loss": -0.0063410233706235886, "memory(GiB)": 112.53, "reward": 1.7083333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2346, "train_speed(iter/s)": 0.028667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.5, "completions/mean_length": 100.38542175292969, "completions/min_length": 44.25, "epoch": 3.497393894266567, "grad_norm": 0.9460876425912599, "kl": 0.295166015625, "learning_rate": 7.365712903481786e-07, "loss": 0.012144392356276512, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2347, "train_speed(iter/s)": 0.02866 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 95.32291793823242, "completions/min_length": 39.25, "epoch": 3.4988830975428145, "grad_norm": 0.9789712648058821, "kl": 0.31396484375, "learning_rate": 7.363629155999288e-07, "loss": -0.008286898024380207, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.3735327273607254, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2348, "train_speed(iter/s)": 0.028663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 99.53125381469727, "completions/min_length": 33.75, "epoch": 3.5003723008190617, "grad_norm": 0.0034649001534149196, "kl": 0.29638671875, "learning_rate": 7.361544879729102e-07, "loss": 0.00029607751639559865, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2349, "train_speed(iter/s)": 0.028667 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 101.52083778381348, "completions/min_length": 33.75, "epoch": 3.501861504095309, "grad_norm": 0.6591110190327635, "kl": 0.2880859375, "learning_rate": 7.359460075137522e-07, "loss": 0.008051859214901924, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2350, "train_speed(iter/s)": 0.028669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 97.81250190734863, "completions/min_length": 32.75, "epoch": 3.503350707371556, "grad_norm": 1.0918976530362747, "kl": 0.30419921875, "learning_rate": 7.357374742690955e-07, "loss": -0.005928065627813339, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2351, "train_speed(iter/s)": 0.028666 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 93.13541793823242, "completions/min_length": 39.75, "epoch": 3.5048399106478034, "grad_norm": 0.809583516170935, "kl": 0.298828125, "learning_rate": 7.355288882855932e-07, "loss": 0.002046041889116168, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2352, "train_speed(iter/s)": 0.028664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.25, "completions/mean_length": 94.29167175292969, "completions/min_length": 44.25, "epoch": 3.5063291139240507, "grad_norm": 0.9168482071948011, "kl": 0.29248046875, "learning_rate": 7.353202496099096e-07, "loss": -0.0016924984520301223, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2353, "train_speed(iter/s)": 0.028664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 105.36458587646484, "completions/min_length": 43.0, "epoch": 3.507818317200298, "grad_norm": 0.8261259783927862, "kl": 0.3134765625, "learning_rate": 7.351115582887211e-07, "loss": -0.016156265512108803, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2354, "train_speed(iter/s)": 0.028664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 93.10416793823242, "completions/min_length": 34.25, "epoch": 3.509307520476545, "grad_norm": 0.0033999673346110327, "kl": 0.32421875, "learning_rate": 7.349028143687158e-07, "loss": 0.00032380037009716034, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2355, "train_speed(iter/s)": 0.028664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 104.84375190734863, "completions/min_length": 43.0, "epoch": 3.5107967237527924, "grad_norm": 0.007976094488521232, "kl": 0.304443359375, "learning_rate": 7.346940178965935e-07, "loss": 0.0003052290703635663, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2356, "train_speed(iter/s)": 0.028664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.0, "completions/mean_length": 99.81250190734863, "completions/min_length": 41.0, "epoch": 3.5122859270290396, "grad_norm": 0.003138791385478979, "kl": 0.3076171875, "learning_rate": 7.344851689190661e-07, "loss": 0.000307195819914341, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2357, "train_speed(iter/s)": 0.028658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 90.29166793823242, "completions/min_length": 36.25, "epoch": 3.5137751303052864, "grad_norm": 1.1063836586993874, "kl": 0.32666015625, "learning_rate": 7.342762674828568e-07, "loss": -0.00543223787099123, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.23648399859666824, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2358, "train_speed(iter/s)": 0.028658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.0, "completions/mean_length": 104.22916984558105, "completions/min_length": 40.75, "epoch": 3.515264333581534, "grad_norm": 0.6373229226257052, "kl": 0.287109375, "learning_rate": 7.340673136347008e-07, "loss": -0.0030101477168500423, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2359, "train_speed(iter/s)": 0.028657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 99.79166984558105, "completions/min_length": 42.5, "epoch": 3.516753536857781, "grad_norm": 0.657453536961256, "kl": 0.3115234375, "learning_rate": 7.338583074213449e-07, "loss": -0.016195781528949738, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2360, "train_speed(iter/s)": 0.028657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 96.37500190734863, "completions/min_length": 44.25, "epoch": 3.518242740134028, "grad_norm": 0.0033302070935349332, "kl": 0.3037109375, "learning_rate": 7.336492488895477e-07, "loss": 0.0003037292044609785, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2361, "train_speed(iter/s)": 0.028655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 99.03125381469727, "completions/min_length": 44.5, "epoch": 3.5197319434102754, "grad_norm": 0.003852768839055667, "kl": 0.3056640625, "learning_rate": 7.334401380860794e-07, "loss": 0.0003052786341868341, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2362, "train_speed(iter/s)": 0.028652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 97.43750190734863, "completions/min_length": 42.0, "epoch": 3.5212211466865226, "grad_norm": 0.9617776134196852, "kl": 0.30712890625, "learning_rate": 7.332309750577222e-07, "loss": 0.02030251733958721, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.614583358168602, "rewards/CineAccuracyORM/std": 0.48275065422058105, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2363, "train_speed(iter/s)": 0.028656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 97.09375190734863, "completions/min_length": 45.0, "epoch": 3.52271034996277, "grad_norm": 0.7053322358236701, "kl": 0.30126953125, "learning_rate": 7.330217598512694e-07, "loss": -0.003406493691727519, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2364, "train_speed(iter/s)": 0.028653 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.5, "completions/mean_length": 101.42708587646484, "completions/min_length": 36.5, "epoch": 3.524199553239017, "grad_norm": 0.003705899461482432, "kl": 0.28759765625, "learning_rate": 7.328124925135265e-07, "loss": 0.0002876373764593154, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2365, "train_speed(iter/s)": 0.028656 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 99.58333587646484, "completions/min_length": 41.75, "epoch": 3.5256887565152644, "grad_norm": 1.3937109116889974, "kl": 0.3251953125, "learning_rate": 7.326031730913106e-07, "loss": 0.013774613849818707, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.35134297609329224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2366, "train_speed(iter/s)": 0.028652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 97.28125190734863, "completions/min_length": 37.25, "epoch": 3.5271779597915116, "grad_norm": 0.004119526427811569, "kl": 0.31005859375, "learning_rate": 7.323938016314502e-07, "loss": 0.00030995061388239264, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2367, "train_speed(iter/s)": 0.028649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 100.83333587646484, "completions/min_length": 40.25, "epoch": 3.528667163067759, "grad_norm": 0.7813520842407512, "kl": 0.32373046875, "learning_rate": 7.321843781807855e-07, "loss": -0.018381282687187195, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2368, "train_speed(iter/s)": 0.028649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.75, "completions/mean_length": 111.97916793823242, "completions/min_length": 49.25, "epoch": 3.530156366344006, "grad_norm": 0.7775411859933399, "kl": 0.2861328125, "learning_rate": 7.319749027861686e-07, "loss": -0.0036664195358753204, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2369, "train_speed(iter/s)": 0.028648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 106.15625381469727, "completions/min_length": 38.5, "epoch": 3.5316455696202533, "grad_norm": 0.9711773126496358, "kl": 0.29345703125, "learning_rate": 7.317653754944628e-07, "loss": 0.013020278885960579, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2370, "train_speed(iter/s)": 0.028644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 99.33333587646484, "completions/min_length": 36.0, "epoch": 3.5331347728965, "grad_norm": 0.0034594549361251204, "kl": 0.30615234375, "learning_rate": 7.315557963525435e-07, "loss": 0.00030646449886262417, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2371, "train_speed(iter/s)": 0.028648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 96.82292175292969, "completions/min_length": 39.75, "epoch": 3.534623976172748, "grad_norm": 0.004001803547210539, "kl": 0.30810546875, "learning_rate": 7.313461654072973e-07, "loss": 0.0003078088629990816, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2372, "train_speed(iter/s)": 0.028645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 97.13541984558105, "completions/min_length": 32.5, "epoch": 3.5361131794489946, "grad_norm": 0.7714524784405176, "kl": 0.30029296875, "learning_rate": 7.311364827056227e-07, "loss": -0.009519734419882298, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2373, "train_speed(iter/s)": 0.028645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.0, "completions/mean_length": 106.57291984558105, "completions/min_length": 47.5, "epoch": 3.537602382725242, "grad_norm": 0.003663324152575451, "kl": 0.29296875, "learning_rate": 7.309267482944293e-07, "loss": 0.0002924035070464015, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2374, "train_speed(iter/s)": 0.028645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.0, "completions/mean_length": 96.69791793823242, "completions/min_length": 33.25, "epoch": 3.539091586001489, "grad_norm": 0.7682877934519038, "kl": 0.4013671875, "learning_rate": 7.307169622206386e-07, "loss": 0.03190344572067261, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2375, "train_speed(iter/s)": 0.028642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 99.09375, "completions/min_length": 39.25, "epoch": 3.5405807892777363, "grad_norm": 0.8119327461640496, "kl": 0.29931640625, "learning_rate": 7.30507124531184e-07, "loss": -0.009723365306854248, "memory(GiB)": 112.53, "reward": 1.8750000596046448, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.26155078411102295, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2376, "train_speed(iter/s)": 0.02864 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.5, "completions/mean_length": 94.51041984558105, "completions/min_length": 40.75, "epoch": 3.5420699925539836, "grad_norm": 1.5294503483607595, "kl": 0.3193359375, "learning_rate": 7.302972352730099e-07, "loss": 0.013800087384879589, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2377, "train_speed(iter/s)": 0.028638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.0, "completions/mean_length": 105.37500190734863, "completions/min_length": 44.5, "epoch": 3.543559195830231, "grad_norm": 0.41996560955935125, "kl": 0.29638671875, "learning_rate": 7.300872944930723e-07, "loss": -0.014068102464079857, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2378, "train_speed(iter/s)": 0.02864 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 92.32291984558105, "completions/min_length": 32.5, "epoch": 3.545048399106478, "grad_norm": 0.006498667601599527, "kl": 0.33251953125, "learning_rate": 7.29877302238339e-07, "loss": 0.00033200252801179886, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2379, "train_speed(iter/s)": 0.028638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 99.25000381469727, "completions/min_length": 41.0, "epoch": 3.5465376023827253, "grad_norm": 0.8849847859046628, "kl": 0.33154296875, "learning_rate": 7.296672585557889e-07, "loss": -0.009164759889245033, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2380, "train_speed(iter/s)": 0.028642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/mean_length": 105.75000190734863, "completions/min_length": 38.5, "epoch": 3.5480268056589725, "grad_norm": 1.1343004871893192, "kl": 0.2822265625, "learning_rate": 7.29457163492413e-07, "loss": -0.005422408692538738, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.23648399859666824, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2381, "train_speed(iter/s)": 0.028638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 108.60416793823242, "completions/min_length": 44.5, "epoch": 3.54951600893522, "grad_norm": 0.0044528194808444626, "kl": 0.29296875, "learning_rate": 7.292470170952133e-07, "loss": 0.0002923151187133044, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2382, "train_speed(iter/s)": 0.028638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 104.12500381469727, "completions/min_length": 36.5, "epoch": 3.551005212211467, "grad_norm": 0.003831440795098831, "kl": 0.30078125, "learning_rate": 7.290368194112035e-07, "loss": 0.0003013118403032422, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2383, "train_speed(iter/s)": 0.028638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 101.27083587646484, "completions/min_length": 42.5, "epoch": 3.552494415487714, "grad_norm": 0.004099017592097542, "kl": 0.29296875, "learning_rate": 7.288265704874089e-07, "loss": 0.0002926935558207333, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2384, "train_speed(iter/s)": 0.028638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.75, "completions/mean_length": 112.46875381469727, "completions/min_length": 43.5, "epoch": 3.5539836187639615, "grad_norm": 0.006689439568327747, "kl": 0.29052734375, "learning_rate": 7.286162703708654e-07, "loss": 0.00029052566969767213, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2385, "train_speed(iter/s)": 0.028635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.25, "completions/mean_length": 115.57291984558105, "completions/min_length": 52.5, "epoch": 3.5554728220402083, "grad_norm": 1.3253773569502016, "kl": 0.27392578125, "learning_rate": 7.284059191086218e-07, "loss": 0.010770517401397228, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.12028077617287636, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.2419789433479309, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2386, "train_speed(iter/s)": 0.028632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.0, "completions/mean_length": 106.10416793823242, "completions/min_length": 48.75, "epoch": 3.5569620253164556, "grad_norm": 0.003752136382541218, "kl": 0.29736328125, "learning_rate": 7.281955167477372e-07, "loss": 0.0002975653624162078, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2387, "train_speed(iter/s)": 0.028634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 101.34375190734863, "completions/min_length": 46.75, "epoch": 3.558451228592703, "grad_norm": 0.0035108563880438207, "kl": 0.296142578125, "learning_rate": 7.279850633352827e-07, "loss": 0.00029595999512821436, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2388, "train_speed(iter/s)": 0.028634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.25, "completions/mean_length": 106.0625, "completions/min_length": 43.75, "epoch": 3.55994043186895, "grad_norm": 1.3162739088115223, "kl": 0.28173828125, "learning_rate": 7.277745589183404e-07, "loss": -0.0052822791039943695, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2389, "train_speed(iter/s)": 0.028633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 106.60416793823242, "completions/min_length": 50.0, "epoch": 3.5614296351451973, "grad_norm": 0.006069295485967892, "kl": 0.31396484375, "learning_rate": 7.275640035440044e-07, "loss": 0.0003134446160402149, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2390, "train_speed(iter/s)": 0.028637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.75, "completions/mean_length": 109.27083587646484, "completions/min_length": 43.0, "epoch": 3.5629188384214445, "grad_norm": 0.5787006229997556, "kl": 0.302734375, "learning_rate": 7.273533972593795e-07, "loss": -0.004903237335383892, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2391, "train_speed(iter/s)": 0.028629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.0, "completions/mean_length": 105.14583587646484, "completions/min_length": 41.75, "epoch": 3.5644080416976918, "grad_norm": 0.0058320281977363095, "kl": 0.291015625, "learning_rate": 7.271427401115824e-07, "loss": 0.00029085372807458043, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2392, "train_speed(iter/s)": 0.028629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.75, "completions/mean_length": 110.41666793823242, "completions/min_length": 51.75, "epoch": 3.565897244973939, "grad_norm": 0.7243054383069112, "kl": 0.30078125, "learning_rate": 7.269320321477408e-07, "loss": -0.009205964393913746, "memory(GiB)": 112.53, "reward": 1.4791666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.4791666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2393, "train_speed(iter/s)": 0.028632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.5, "completions/mean_length": 115.01041793823242, "completions/min_length": 50.0, "epoch": 3.5673864482501862, "grad_norm": 0.003495374864143178, "kl": 0.27978515625, "learning_rate": 7.267212734149942e-07, "loss": 0.00027965474873781204, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2394, "train_speed(iter/s)": 0.028626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 104.06250190734863, "completions/min_length": 38.0, "epoch": 3.5688756515264335, "grad_norm": 0.5299600869618293, "kl": 0.32177734375, "learning_rate": 7.265104639604933e-07, "loss": 0.012450157664716244, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2395, "train_speed(iter/s)": 0.028626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 112.95833778381348, "completions/min_length": 54.0, "epoch": 3.5703648548026807, "grad_norm": 0.0046000041932918075, "kl": 0.27392578125, "learning_rate": 7.262996038314e-07, "loss": 0.00027408209280110896, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2396, "train_speed(iter/s)": 0.028622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 106.82291984558105, "completions/min_length": 54.0, "epoch": 3.5718540580789275, "grad_norm": 1.294408984662712, "kl": 0.29736328125, "learning_rate": 7.260886930748875e-07, "loss": 0.008172566071152687, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2397, "train_speed(iter/s)": 0.028622 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 112.38541793823242, "completions/min_length": 43.25, "epoch": 3.573343261355175, "grad_norm": 0.7565813555551872, "kl": 0.28369140625, "learning_rate": 7.258777317381406e-07, "loss": -0.0013160689268261194, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2398, "train_speed(iter/s)": 0.028625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.0, "completions/mean_length": 103.73958587646484, "completions/min_length": 40.25, "epoch": 3.574832464631422, "grad_norm": 1.0572099288920602, "kl": 0.28955078125, "learning_rate": 7.256667198683551e-07, "loss": 0.0069695813581347466, "memory(GiB)": 112.53, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2399, "train_speed(iter/s)": 0.028624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 364.5, "completions/mean_length": 120.22917175292969, "completions/min_length": 50.0, "epoch": 3.5763216679076693, "grad_norm": 0.005480114035764562, "kl": 0.28564453125, "learning_rate": 7.254556575127384e-07, "loss": 0.00028540173661895096, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2400, "train_speed(iter/s)": 0.028619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.0, "completions/mean_length": 117.93750381469727, "completions/min_length": 48.0, "epoch": 3.5778108711839165, "grad_norm": 0.7276293430242281, "kl": 0.2841796875, "learning_rate": 7.25244544718509e-07, "loss": 0.0002035582292592153, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2401, "train_speed(iter/s)": 0.028611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 111.34375381469727, "completions/min_length": 46.0, "epoch": 3.5793000744601637, "grad_norm": 0.5788692827286717, "kl": 0.30126953125, "learning_rate": 7.250333815328969e-07, "loss": -7.132507744245231e-05, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2402, "train_speed(iter/s)": 0.028614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 108.55208587646484, "completions/min_length": 32.0, "epoch": 3.580789277736411, "grad_norm": 1.1569471343383133, "kl": 0.3125, "learning_rate": 7.248221680031431e-07, "loss": -0.013907304033637047, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2403, "train_speed(iter/s)": 0.028617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.5, "completions/mean_length": 113.38541984558105, "completions/min_length": 40.25, "epoch": 3.5822784810126582, "grad_norm": 1.102188554490807, "kl": 0.28857421875, "learning_rate": 7.246109041764998e-07, "loss": -0.006377062760293484, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2404, "train_speed(iter/s)": 0.028617 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.5, "completions/mean_length": 119.90625190734863, "completions/min_length": 43.25, "epoch": 3.5837676842889055, "grad_norm": 1.1450643888549554, "kl": 0.29248046875, "learning_rate": 7.243995901002311e-07, "loss": -0.0359019935131073, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.47428806871175766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2405, "train_speed(iter/s)": 0.028613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.75, "completions/mean_length": 124.73958778381348, "completions/min_length": 40.0, "epoch": 3.5852568875651527, "grad_norm": 1.427267402380076, "kl": 0.252685546875, "learning_rate": 7.241882258216114e-07, "loss": -0.003972616046667099, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.3182126581668854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2406, "train_speed(iter/s)": 0.028611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.75, "completions/mean_length": 113.53125190734863, "completions/min_length": 45.5, "epoch": 3.5867460908414, "grad_norm": 0.004889267441924653, "kl": 0.2900390625, "learning_rate": 7.239768113879272e-07, "loss": 0.00028978107729926705, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2407, "train_speed(iter/s)": 0.028604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.0, "completions/mean_length": 114.94791984558105, "completions/min_length": 52.5, "epoch": 3.588235294117647, "grad_norm": 2.0873738194013365, "kl": 0.296142578125, "learning_rate": 7.237653468464755e-07, "loss": 0.03986688703298569, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.102054663002491, "rewards/CineAccuracyORM/mean": 0.6875000223517418, "rewards/CineAccuracyORM/std": 0.4190470837056637, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2408, "train_speed(iter/s)": 0.028595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.5, "completions/mean_length": 114.50000381469727, "completions/min_length": 50.75, "epoch": 3.5897244973938944, "grad_norm": 0.6710557033947889, "kl": 0.28515625, "learning_rate": 7.235538322445651e-07, "loss": 0.0025015207938849926, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2409, "train_speed(iter/s)": 0.028588 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.25, "completions/mean_length": 128.81250381469727, "completions/min_length": 54.5, "epoch": 3.5912137006701412, "grad_norm": 1.7037713269795853, "kl": 0.261474609375, "learning_rate": 7.233422676295156e-07, "loss": -0.0027288608253002167, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2410, "train_speed(iter/s)": 0.02858 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.25, "completions/mean_length": 108.78125381469727, "completions/min_length": 38.25, "epoch": 3.592702903946389, "grad_norm": 0.5746723326298981, "kl": 0.29345703125, "learning_rate": 7.231306530486578e-07, "loss": -0.0011388924904167652, "memory(GiB)": 112.53, "reward": 1.479166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.4791666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2411, "train_speed(iter/s)": 0.028578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.75, "completions/mean_length": 119.29166984558105, "completions/min_length": 49.75, "epoch": 3.5941921072226357, "grad_norm": 0.6470882464496958, "kl": 0.5859375, "learning_rate": 7.229189885493341e-07, "loss": 0.01723434403538704, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2412, "train_speed(iter/s)": 0.028572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 102.15625381469727, "completions/min_length": 46.0, "epoch": 3.595681310498883, "grad_norm": 0.5514844189332755, "kl": 0.29931640625, "learning_rate": 7.227072741788975e-07, "loss": 0.014748968183994293, "memory(GiB)": 112.53, "reward": 1.3437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.3437500074505806, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2413, "train_speed(iter/s)": 0.028572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 110.63541984558105, "completions/min_length": 52.5, "epoch": 3.59717051377513, "grad_norm": 0.8033033034118763, "kl": 0.269775390625, "learning_rate": 7.224955099847127e-07, "loss": -0.006555907428264618, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2414, "train_speed(iter/s)": 0.028575 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.5, "completions/mean_length": 107.58333587646484, "completions/min_length": 47.75, "epoch": 3.5986597170513774, "grad_norm": 1.4817550302197953, "kl": 0.29296875, "learning_rate": 7.222836960141552e-07, "loss": 0.002576229628175497, "memory(GiB)": 112.53, "reward": 1.9375000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.9375000149011612, "rewards/CineAccuracyORM/std": 0.15474381670355797, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2415, "train_speed(iter/s)": 0.028578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.75, "completions/mean_length": 113.35416984558105, "completions/min_length": 57.5, "epoch": 3.6001489203276247, "grad_norm": 0.7325337239776589, "kl": 0.27783203125, "learning_rate": 7.220718323146117e-07, "loss": 0.0003495741984806955, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2416, "train_speed(iter/s)": 0.028578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.75, "completions/mean_length": 115.35416984558105, "completions/min_length": 48.5, "epoch": 3.601638123603872, "grad_norm": 1.2544744048060894, "kl": 0.279541015625, "learning_rate": 7.218599189334799e-07, "loss": -0.0065760831348598, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2417, "train_speed(iter/s)": 0.028578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 105.97916984558105, "completions/min_length": 45.25, "epoch": 3.603127326880119, "grad_norm": 0.0057139821867706365, "kl": 0.29541015625, "learning_rate": 7.216479559181688e-07, "loss": 0.000295359204756096, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2418, "train_speed(iter/s)": 0.028581 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.75, "completions/mean_length": 114.86458778381348, "completions/min_length": 45.75, "epoch": 3.6046165301563664, "grad_norm": 0.7297513356441989, "kl": 0.29541015625, "learning_rate": 7.214359433160984e-07, "loss": 0.015238671563565731, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2419, "train_speed(iter/s)": 0.028577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 393.0, "completions/mean_length": 125.15625, "completions/min_length": 46.25, "epoch": 3.6061057334326136, "grad_norm": 0.4502480823855113, "kl": 0.28759765625, "learning_rate": 7.212238811747002e-07, "loss": -0.005992944817990065, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2420, "train_speed(iter/s)": 0.028575 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 109.95833587646484, "completions/min_length": 48.5, "epoch": 3.607594936708861, "grad_norm": 1.1763661593538646, "kl": 0.3056640625, "learning_rate": 7.210117695414162e-07, "loss": 0.0026272721588611603, "memory(GiB)": 112.53, "reward": 1.6979167461395264, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.43859851360321045, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2421, "train_speed(iter/s)": 0.028574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 108.21875190734863, "completions/min_length": 48.5, "epoch": 3.609084139985108, "grad_norm": 1.4844023813823488, "kl": 0.32763671875, "learning_rate": 7.207996084636998e-07, "loss": 0.012385636568069458, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.2986612282693386, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2422, "train_speed(iter/s)": 0.028572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 101.38542175292969, "completions/min_length": 45.25, "epoch": 3.610573343261355, "grad_norm": 0.008861563727400554, "kl": 0.28759765625, "learning_rate": 7.20587397989015e-07, "loss": 0.0002878521045204252, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2423, "train_speed(iter/s)": 0.028569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.25, "completions/mean_length": 117.77083587646484, "completions/min_length": 49.25, "epoch": 3.6120625465376026, "grad_norm": 0.004490378258480282, "kl": 0.27490234375, "learning_rate": 7.203751381648376e-07, "loss": 0.00027516254340298474, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2424, "train_speed(iter/s)": 0.028564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.5, "completions/mean_length": 123.95833587646484, "completions/min_length": 58.25, "epoch": 3.6135517498138494, "grad_norm": 0.01323271327097093, "kl": 0.2744140625, "learning_rate": 7.20162829038654e-07, "loss": 0.00027472892543300986, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2425, "train_speed(iter/s)": 0.028563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.25, "completions/mean_length": 114.73958396911621, "completions/min_length": 62.5, "epoch": 3.6150409530900967, "grad_norm": 0.005174627251621564, "kl": 0.29541015625, "learning_rate": 7.199504706579616e-07, "loss": 0.0002946650783997029, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2426, "train_speed(iter/s)": 0.028558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.0, "completions/mean_length": 105.55208396911621, "completions/min_length": 33.5, "epoch": 3.616530156366344, "grad_norm": 0.006372908926721304, "kl": 0.28955078125, "learning_rate": 7.197380630702691e-07, "loss": 0.00028910074615851045, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2427, "train_speed(iter/s)": 0.028558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.75, "completions/mean_length": 125.3437557220459, "completions/min_length": 49.25, "epoch": 3.618019359642591, "grad_norm": 0.982312315885111, "kl": 0.291015625, "learning_rate": 7.195256063230956e-07, "loss": 0.005446615628898144, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2428, "train_speed(iter/s)": 0.028554 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.25, "completions/mean_length": 104.35417175292969, "completions/min_length": 44.75, "epoch": 3.6195085629188384, "grad_norm": 1.30002659499958, "kl": 0.3095703125, "learning_rate": 7.193131004639721e-07, "loss": -0.00256954412907362, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2429, "train_speed(iter/s)": 0.028557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 108.68750381469727, "completions/min_length": 46.25, "epoch": 3.6209977661950856, "grad_norm": 0.029224150200470287, "kl": 0.3095703125, "learning_rate": 7.191005455404398e-07, "loss": 0.00030916568357497454, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2430, "train_speed(iter/s)": 0.028556 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.25, "completions/mean_length": 113.05208587646484, "completions/min_length": 44.25, "epoch": 3.622486969471333, "grad_norm": 0.006455085437584702, "kl": 0.2958984375, "learning_rate": 7.188879416000513e-07, "loss": 0.0002962384023703635, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2431, "train_speed(iter/s)": 0.028558 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/mean_length": 118.05208778381348, "completions/min_length": 51.25, "epoch": 3.62397617274758, "grad_norm": 0.588245705906673, "kl": 0.270263671875, "learning_rate": 7.186752886903701e-07, "loss": 7.398503657896072e-05, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2432, "train_speed(iter/s)": 0.02856 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 118.80208587646484, "completions/min_length": 51.25, "epoch": 3.6254653760238273, "grad_norm": 0.6135136759134141, "kl": 0.283203125, "learning_rate": 7.184625868589703e-07, "loss": 0.00040763625293038785, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2433, "train_speed(iter/s)": 0.02856 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.0, "completions/mean_length": 118.55208396911621, "completions/min_length": 47.5, "epoch": 3.6269545793000746, "grad_norm": 1.1997638339782486, "kl": 0.27197265625, "learning_rate": 7.182498361534377e-07, "loss": 0.01537175290286541, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2434, "train_speed(iter/s)": 0.028562 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.5, "completions/mean_length": 117.64583778381348, "completions/min_length": 58.25, "epoch": 3.628443782576322, "grad_norm": 0.018883711452174982, "kl": 0.29833984375, "learning_rate": 7.180370366213683e-07, "loss": 0.00029810500564053655, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2435, "train_speed(iter/s)": 0.028555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 121.43750381469727, "completions/min_length": 51.0, "epoch": 3.6299329858525686, "grad_norm": 0.006833717329047949, "kl": 0.29052734375, "learning_rate": 7.178241883103693e-07, "loss": 0.0002899515093304217, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2436, "train_speed(iter/s)": 0.028555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 364.0, "completions/mean_length": 114.52083587646484, "completions/min_length": 45.25, "epoch": 3.6314221891288163, "grad_norm": 1.3560509949085509, "kl": 0.30078125, "learning_rate": 7.176112912680587e-07, "loss": 0.018058154731988907, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.11116772517561913, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.34146176278591156, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2437, "train_speed(iter/s)": 0.028557 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.25, "completions/mean_length": 124.44791984558105, "completions/min_length": 57.25, "epoch": 3.632911392405063, "grad_norm": 0.006495737129759789, "kl": 0.27587890625, "learning_rate": 7.173983455420658e-07, "loss": 0.0002758473565336317, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2438, "train_speed(iter/s)": 0.028552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 385.5, "completions/mean_length": 132.45833587646484, "completions/min_length": 51.0, "epoch": 3.6344005956813104, "grad_norm": 1.4912434204002125, "kl": 0.2919921875, "learning_rate": 7.171853511800304e-07, "loss": -0.0112802404910326, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.1322161816060543, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.42770570516586304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2439, "train_speed(iter/s)": 0.028548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.25, "completions/mean_length": 110.80208778381348, "completions/min_length": 44.75, "epoch": 3.6358897989575576, "grad_norm": 0.0052425665322162005, "kl": 0.29150390625, "learning_rate": 7.169723082296033e-07, "loss": 0.000291624543024227, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2440, "train_speed(iter/s)": 0.028547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.0, "completions/mean_length": 123.86458778381348, "completions/min_length": 44.5, "epoch": 3.637379002233805, "grad_norm": 0.005400819031667477, "kl": 0.29296875, "learning_rate": 7.167592167384461e-07, "loss": 0.00029260836890898645, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2441, "train_speed(iter/s)": 0.028542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.0, "completions/mean_length": 116.95833587646484, "completions/min_length": 50.25, "epoch": 3.638868205510052, "grad_norm": 0.0052368031375531416, "kl": 0.2841796875, "learning_rate": 7.165460767542313e-07, "loss": 0.00028477743035182357, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2442, "train_speed(iter/s)": 0.028541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.0, "completions/mean_length": 129.07291984558105, "completions/min_length": 39.25, "epoch": 3.6403574087862993, "grad_norm": 0.00924048229402849, "kl": 0.29150390625, "learning_rate": 7.163328883246424e-07, "loss": 0.00029123786953277886, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2443, "train_speed(iter/s)": 0.028541 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.5, "completions/mean_length": 119.62500381469727, "completions/min_length": 46.5, "epoch": 3.6418466120625466, "grad_norm": 0.4440915594533719, "kl": 0.285888671875, "learning_rate": 7.161196514973734e-07, "loss": 0.007230391725897789, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2444, "train_speed(iter/s)": 0.02854 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.25, "completions/mean_length": 121.27083778381348, "completions/min_length": 49.25, "epoch": 3.643335815338794, "grad_norm": 0.00760102156516951, "kl": 0.27685546875, "learning_rate": 7.159063663201297e-07, "loss": 0.0002767977421171963, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2445, "train_speed(iter/s)": 0.028539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.5, "completions/mean_length": 136.40625381469727, "completions/min_length": 41.25, "epoch": 3.644825018615041, "grad_norm": 1.1067492543224429, "kl": 0.2734375, "learning_rate": 7.156930328406267e-07, "loss": 0.007279522716999054, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6770833488553762, "rewards/CineAccuracyORM/std": 0.24199935421347618, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2446, "train_speed(iter/s)": 0.028533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.5, "completions/mean_length": 114.29166984558105, "completions/min_length": 39.25, "epoch": 3.6463142218912883, "grad_norm": 0.00844590181468103, "kl": 0.29345703125, "learning_rate": 7.154796511065912e-07, "loss": 0.0002929602051153779, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2447, "train_speed(iter/s)": 0.028534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.5, "completions/mean_length": 120.23958778381348, "completions/min_length": 44.25, "epoch": 3.6478034251675355, "grad_norm": 1.7202624932979245, "kl": 0.280517578125, "learning_rate": 7.15266221165761e-07, "loss": 0.00860479474067688, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.11258216947317123, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.34280356764793396, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2448, "train_speed(iter/s)": 0.02853 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 116.92708587646484, "completions/min_length": 39.25, "epoch": 3.6492926284437823, "grad_norm": 1.76714789504242, "kl": 0.29150390625, "learning_rate": 7.15052743065884e-07, "loss": 0.014400212094187737, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2449, "train_speed(iter/s)": 0.028523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.0, "completions/mean_length": 114.50000381469727, "completions/min_length": 45.5, "epoch": 3.65078183172003, "grad_norm": 0.020378028244800984, "kl": 0.30908203125, "learning_rate": 7.14839216854719e-07, "loss": 0.00030937910196371377, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2450, "train_speed(iter/s)": 0.028519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.75, "completions/mean_length": 118.04167175292969, "completions/min_length": 40.75, "epoch": 3.652271034996277, "grad_norm": 0.012554599652624109, "kl": 0.28759765625, "learning_rate": 7.146256425800364e-07, "loss": 0.0002872683689929545, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2451, "train_speed(iter/s)": 0.028522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/mean_length": 126.85416984558105, "completions/min_length": 43.75, "epoch": 3.653760238272524, "grad_norm": 1.31309852655943, "kl": 0.28369140625, "learning_rate": 7.14412020289616e-07, "loss": -0.01563883014023304, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.5520833386108279, "rewards/CineAccuracyORM/std": 0.29910537227988243, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2452, "train_speed(iter/s)": 0.028518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.5, "completions/mean_length": 119.62500190734863, "completions/min_length": 38.25, "epoch": 3.6552494415487713, "grad_norm": 0.5851292795826342, "kl": 0.28466796875, "learning_rate": 7.141983500312497e-07, "loss": 0.006888438947498798, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2453, "train_speed(iter/s)": 0.028518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.25, "completions/mean_length": 132.39583778381348, "completions/min_length": 38.0, "epoch": 3.6567386448250185, "grad_norm": 0.8463978161695528, "kl": 0.264404296875, "learning_rate": 7.139846318527392e-07, "loss": -0.007273993920534849, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2454, "train_speed(iter/s)": 0.028516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.5, "completions/mean_length": 114.94792175292969, "completions/min_length": 40.25, "epoch": 3.6582278481012658, "grad_norm": 1.8086013517057709, "kl": 0.28759765625, "learning_rate": 7.137708658018972e-07, "loss": -0.029075002297759056, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.4681183323264122, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2455, "train_speed(iter/s)": 0.028517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.0, "completions/mean_length": 131.91666984558105, "completions/min_length": 47.25, "epoch": 3.659717051377513, "grad_norm": 0.7867060989518707, "kl": 0.2685546875, "learning_rate": 7.135570519265472e-07, "loss": 0.0384182408452034, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2456, "train_speed(iter/s)": 0.028517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 504.75, "completions/mean_length": 155.35416984558105, "completions/min_length": 59.25, "epoch": 3.6612062546537603, "grad_norm": 0.862044097007613, "kl": 0.249755859375, "learning_rate": 7.133431902745233e-07, "loss": -0.008804412558674812, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2457, "train_speed(iter/s)": 0.028515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.5, "completions/mean_length": 148.25000762939453, "completions/min_length": 59.75, "epoch": 3.6626954579300075, "grad_norm": 0.4699661453785174, "kl": 0.26025390625, "learning_rate": 7.131292808936704e-07, "loss": -0.007087291218340397, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2458, "train_speed(iter/s)": 0.028514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 405.75, "completions/mean_length": 147.7604217529297, "completions/min_length": 48.75, "epoch": 3.6641846612062547, "grad_norm": 1.123876334603416, "kl": 0.256103515625, "learning_rate": 7.129153238318439e-07, "loss": -0.007129611447453499, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2459, "train_speed(iter/s)": 0.028515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.0, "completions/mean_length": 137.04166984558105, "completions/min_length": 40.25, "epoch": 3.665673864482502, "grad_norm": 0.007587813448014481, "kl": 0.273193359375, "learning_rate": 7.127013191369102e-07, "loss": 0.00027301174122840166, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2460, "train_speed(iter/s)": 0.028507 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 406.75, "completions/mean_length": 145.92708778381348, "completions/min_length": 45.0, "epoch": 3.6671630677587492, "grad_norm": 0.007078324244853235, "kl": 0.26220703125, "learning_rate": 7.124872668567457e-07, "loss": 0.00026184687158092856, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2461, "train_speed(iter/s)": 0.028502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.75, "completions/mean_length": 140.1354217529297, "completions/min_length": 53.0, "epoch": 3.668652271034996, "grad_norm": 0.6090884474929192, "kl": 0.28076171875, "learning_rate": 7.12273167039238e-07, "loss": -0.025069452822208405, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2462, "train_speed(iter/s)": 0.028501 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.0, "completions/mean_length": 131.38541984558105, "completions/min_length": 42.5, "epoch": 3.6701414743112437, "grad_norm": 0.45046807075759254, "kl": 0.2822265625, "learning_rate": 7.120590197322853e-07, "loss": 0.006173049099743366, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2463, "train_speed(iter/s)": 0.028499 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 324.25, "completions/mean_length": 135.3333396911621, "completions/min_length": 34.75, "epoch": 3.6716306775874905, "grad_norm": 0.7506917531344729, "kl": 0.2568359375, "learning_rate": 7.118448249837965e-07, "loss": 0.014995116740465164, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2464, "train_speed(iter/s)": 0.028494 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.75, "completions/mean_length": 130.25000762939453, "completions/min_length": 51.75, "epoch": 3.6731198808637378, "grad_norm": 0.6886980116644589, "kl": 0.28369140625, "learning_rate": 7.116305828416907e-07, "loss": -0.027184665203094482, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2465, "train_speed(iter/s)": 0.028493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.25, "completions/mean_length": 154.21875381469727, "completions/min_length": 45.25, "epoch": 3.674609084139985, "grad_norm": 0.8685271164052784, "kl": 0.263671875, "learning_rate": 7.114162933538978e-07, "loss": 0.011890696361660957, "memory(GiB)": 112.53, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2466, "train_speed(iter/s)": 0.028489 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.0, "completions/mean_length": 129.8854217529297, "completions/min_length": 38.75, "epoch": 3.6760982874162322, "grad_norm": 0.005929059826629788, "kl": 0.273681640625, "learning_rate": 7.112019565683584e-07, "loss": 0.0002730593550950289, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2467, "train_speed(iter/s)": 0.028484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.5, "completions/mean_length": 134.22916984558105, "completions/min_length": 40.25, "epoch": 3.6775874906924795, "grad_norm": 0.9361428387057837, "kl": 0.28466796875, "learning_rate": 7.109875725330237e-07, "loss": 0.016054989770054817, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2468, "train_speed(iter/s)": 0.028483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.25, "completions/mean_length": 111.25000381469727, "completions/min_length": 39.25, "epoch": 3.6790766939687267, "grad_norm": 0.005454811261631096, "kl": 0.29296875, "learning_rate": 7.107731412958553e-07, "loss": 0.0002928444591816515, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2469, "train_speed(iter/s)": 0.028482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.0, "completions/mean_length": 113.59375, "completions/min_length": 37.75, "epoch": 3.680565897244974, "grad_norm": 1.1810649751270226, "kl": 0.31396484375, "learning_rate": 7.105586629048255e-07, "loss": 0.010365579277276993, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2470, "train_speed(iter/s)": 0.028479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.25, "completions/mean_length": 124.52083396911621, "completions/min_length": 37.5, "epoch": 3.682055100521221, "grad_norm": 0.870594463107221, "kl": 0.305908203125, "learning_rate": 7.10344137407917e-07, "loss": 0.013398019596934319, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2471, "train_speed(iter/s)": 0.028474 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.5, "completions/mean_length": 134.26042556762695, "completions/min_length": 36.75, "epoch": 3.6835443037974684, "grad_norm": 0.0074573712111676545, "kl": 0.266357421875, "learning_rate": 7.101295648531233e-07, "loss": 0.0002662819460965693, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2472, "train_speed(iter/s)": 0.02847 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.25, "completions/mean_length": 126.12500381469727, "completions/min_length": 47.75, "epoch": 3.6850335070737157, "grad_norm": 0.007789711984481489, "kl": 0.3046875, "learning_rate": 7.09914945288448e-07, "loss": 0.0003046095953322947, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2473, "train_speed(iter/s)": 0.028464 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.5, "completions/mean_length": 120.42708587646484, "completions/min_length": 35.25, "epoch": 3.686522710349963, "grad_norm": 0.006921841246048824, "kl": 0.27685546875, "learning_rate": 7.097002787619058e-07, "loss": 0.00027744119870476425, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2474, "train_speed(iter/s)": 0.028466 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.5, "completions/mean_length": 122.31250190734863, "completions/min_length": 36.75, "epoch": 3.6880119136262097, "grad_norm": 0.9873688755179394, "kl": 0.28369140625, "learning_rate": 7.094855653215214e-07, "loss": -0.010125668719410896, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2475, "train_speed(iter/s)": 0.028465 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.5, "completions/mean_length": 120.48958778381348, "completions/min_length": 38.75, "epoch": 3.6895011169024574, "grad_norm": 0.006427303698513514, "kl": 0.29638671875, "learning_rate": 7.092708050153301e-07, "loss": 0.00029634631937369704, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2476, "train_speed(iter/s)": 0.028462 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 497.5, "completions/mean_length": 137.62500381469727, "completions/min_length": 44.25, "epoch": 3.690990320178704, "grad_norm": 0.8246664088625408, "kl": 0.265625, "learning_rate": 7.090559978913779e-07, "loss": -0.011436025612056255, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2477, "train_speed(iter/s)": 0.028453 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.25, "completions/mean_length": 119.17708587646484, "completions/min_length": 41.5, "epoch": 3.6924795234549515, "grad_norm": 0.004185621984896653, "kl": 0.27880859375, "learning_rate": 7.088411439977211e-07, "loss": 0.0002788404526654631, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2478, "train_speed(iter/s)": 0.028449 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.75, "completions/mean_length": 118.71875190734863, "completions/min_length": 42.25, "epoch": 3.6939687267311987, "grad_norm": 0.6766616512416646, "kl": 0.28955078125, "learning_rate": 7.086262433824266e-07, "loss": -0.00790808629244566, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2479, "train_speed(iter/s)": 0.028445 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 106.91666984558105, "completions/min_length": 42.0, "epoch": 3.695457930007446, "grad_norm": 1.0013840385529362, "kl": 0.2919921875, "learning_rate": 7.084112960935715e-07, "loss": -0.002941807731986046, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2480, "train_speed(iter/s)": 0.028445 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.25, "completions/mean_length": 120.00000381469727, "completions/min_length": 28.75, "epoch": 3.696947133283693, "grad_norm": 0.005386060224599711, "kl": 0.2880859375, "learning_rate": 7.081963021792434e-07, "loss": 0.000288421637378633, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2481, "train_speed(iter/s)": 0.028438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.75, "completions/mean_length": 111.28125381469727, "completions/min_length": 43.0, "epoch": 3.6984363365599404, "grad_norm": 0.004024248604136882, "kl": 0.277099609375, "learning_rate": 7.079812616875407e-07, "loss": 0.0002769853745121509, "memory(GiB)": 112.53, "reward": 1.4166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.4166666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2482, "train_speed(iter/s)": 0.02844 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.5, "completions/mean_length": 119.5312557220459, "completions/min_length": 46.5, "epoch": 3.6999255398361877, "grad_norm": 0.0046624025612145405, "kl": 0.2900390625, "learning_rate": 7.077661746665719e-07, "loss": 0.0002901647239923477, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2483, "train_speed(iter/s)": 0.028436 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 383.0, "completions/mean_length": 121.33333778381348, "completions/min_length": 31.75, "epoch": 3.701414743112435, "grad_norm": 0.00598137961734578, "kl": 0.27783203125, "learning_rate": 7.075510411644558e-07, "loss": 0.00027748511638492346, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2484, "train_speed(iter/s)": 0.028434 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.25, "completions/mean_length": 121.07291793823242, "completions/min_length": 42.0, "epoch": 3.702903946388682, "grad_norm": 1.381685478434501, "kl": 0.2626953125, "learning_rate": 7.073358612293218e-07, "loss": -0.019192839041352272, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.21880721300840378, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2485, "train_speed(iter/s)": 0.028435 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/mean_length": 108.15625381469727, "completions/min_length": 40.75, "epoch": 3.7043931496649294, "grad_norm": 0.5140355899295717, "kl": 0.294921875, "learning_rate": 7.071206349093096e-07, "loss": -0.02146279439330101, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2486, "train_speed(iter/s)": 0.02843 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.5, "completions/mean_length": 130.57291793823242, "completions/min_length": 45.75, "epoch": 3.7058823529411766, "grad_norm": 0.006318953926592468, "kl": 0.272705078125, "learning_rate": 7.069053622525696e-07, "loss": 0.0002724897349253297, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2487, "train_speed(iter/s)": 0.028426 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 382.75, "completions/mean_length": 133.92708587646484, "completions/min_length": 47.0, "epoch": 3.7073715562174234, "grad_norm": 0.008041581267715325, "kl": 0.283447265625, "learning_rate": 7.066900433072619e-07, "loss": 0.00028363047749735415, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2488, "train_speed(iter/s)": 0.028421 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.75, "completions/mean_length": 107.42708587646484, "completions/min_length": 33.25, "epoch": 3.708860759493671, "grad_norm": 3.3154644329569405, "kl": 0.2890625, "learning_rate": 7.064746781215577e-07, "loss": -0.005380622576922178, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.3284776881337166, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2489, "train_speed(iter/s)": 0.028421 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 107.25000381469727, "completions/min_length": 35.75, "epoch": 3.710349962769918, "grad_norm": 1.5782835436362623, "kl": 0.29443359375, "learning_rate": 7.06259266743638e-07, "loss": 0.003860244993120432, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2490, "train_speed(iter/s)": 0.028424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.75, "completions/mean_length": 132.48958587646484, "completions/min_length": 35.5, "epoch": 3.711839166046165, "grad_norm": 1.0335765805070005, "kl": 0.318359375, "learning_rate": 7.060438092216941e-07, "loss": 0.029626552015542984, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2491, "train_speed(iter/s)": 0.02842 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 111.29166984558105, "completions/min_length": 31.5, "epoch": 3.7133283693224124, "grad_norm": 0.004354688439603917, "kl": 0.28955078125, "learning_rate": 7.058283056039283e-07, "loss": 0.0002896953374147415, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2492, "train_speed(iter/s)": 0.02842 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.75, "completions/mean_length": 118.89583587646484, "completions/min_length": 34.0, "epoch": 3.7148175725986596, "grad_norm": 0.03639653716758622, "kl": 0.29248046875, "learning_rate": 7.056127559385525e-07, "loss": 0.00029209820786491036, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2493, "train_speed(iter/s)": 0.028416 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 109.14583396911621, "completions/min_length": 44.25, "epoch": 3.716306775874907, "grad_norm": 1.5365439876523694, "kl": 0.3017578125, "learning_rate": 7.053971602737891e-07, "loss": 0.008319512940943241, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2494, "train_speed(iter/s)": 0.028419 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.5, "completions/mean_length": 128.73958778381348, "completions/min_length": 35.5, "epoch": 3.717795979151154, "grad_norm": 0.003799564240651035, "kl": 0.252197265625, "learning_rate": 7.05181518657871e-07, "loss": 0.000251710822340101, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2495, "train_speed(iter/s)": 0.028414 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.25, "completions/mean_length": 118.06250381469727, "completions/min_length": 38.5, "epoch": 3.7192851824274014, "grad_norm": 1.312557597191531, "kl": 0.30419921875, "learning_rate": 7.049658311390412e-07, "loss": 0.013565785251557827, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2496, "train_speed(iter/s)": 0.028407 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.0, "completions/mean_length": 127.63542175292969, "completions/min_length": 45.0, "epoch": 3.7207743857036486, "grad_norm": 1.4366080005602666, "kl": 0.27783203125, "learning_rate": 7.047500977655531e-07, "loss": 0.037996888160705566, "memory(GiB)": 112.53, "reward": 1.7083334028720856, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.381936639547348, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2497, "train_speed(iter/s)": 0.028406 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.5, "completions/mean_length": 127.55208778381348, "completions/min_length": 42.75, "epoch": 3.722263588979896, "grad_norm": 0.5404301189661858, "kl": 0.28466796875, "learning_rate": 7.0453431858567e-07, "loss": -0.012481740675866604, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2498, "train_speed(iter/s)": 0.028408 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.25, "completions/mean_length": 134.85417366027832, "completions/min_length": 42.0, "epoch": 3.723752792256143, "grad_norm": 1.1018361085782082, "kl": 0.24560546875, "learning_rate": 7.043184936476658e-07, "loss": 0.043793193995952606, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2499, "train_speed(iter/s)": 0.028404 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.0, "completions/mean_length": 128.08333778381348, "completions/min_length": 43.75, "epoch": 3.7252419955323903, "grad_norm": 1.868915584484314, "kl": 0.28271484375, "learning_rate": 7.041026229998246e-07, "loss": 0.00591763760894537, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.35134297609329224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2500, "train_speed(iter/s)": 0.028403 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.0, "completions/mean_length": 112.75000381469727, "completions/min_length": 37.25, "epoch": 3.726731198808637, "grad_norm": 1.7480629359068927, "kl": 0.29345703125, "learning_rate": 7.038867066904406e-07, "loss": 0.0667198896408081, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.14204494282603264, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.2776889093220234, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2501, "train_speed(iter/s)": 0.028387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.5, "completions/mean_length": 123.81250381469727, "completions/min_length": 49.25, "epoch": 3.728220402084885, "grad_norm": 0.00462802925038773, "kl": 0.277587890625, "learning_rate": 7.036707447678184e-07, "loss": 0.0002781150978989899, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2502, "train_speed(iter/s)": 0.028385 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 335.0, "completions/mean_length": 118.03125381469727, "completions/min_length": 39.75, "epoch": 3.7297096053611316, "grad_norm": 0.794479106705138, "kl": 0.28759765625, "learning_rate": 7.034547372802726e-07, "loss": 0.02402826026082039, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.12724433839321136, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2503, "train_speed(iter/s)": 0.028387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 415.25, "completions/mean_length": 124.71875381469727, "completions/min_length": 33.25, "epoch": 3.731198808637379, "grad_norm": 0.004012840429219288, "kl": 0.27587890625, "learning_rate": 7.032386842761281e-07, "loss": 0.00027555166161619127, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2504, "train_speed(iter/s)": 0.028381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 396.25, "completions/mean_length": 121.39583587646484, "completions/min_length": 42.5, "epoch": 3.732688011913626, "grad_norm": 0.004181006090417081, "kl": 0.27490234375, "learning_rate": 7.0302258580372e-07, "loss": 0.00027417088858783245, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2505, "train_speed(iter/s)": 0.028382 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/mean_length": 119.30208969116211, "completions/min_length": 36.0, "epoch": 3.7341772151898733, "grad_norm": 1.908803223061922, "kl": 0.3125, "learning_rate": 7.028064419113934e-07, "loss": -0.019704528152942657, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.4177052788436413, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2506, "train_speed(iter/s)": 0.028384 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.5, "completions/mean_length": 112.83333587646484, "completions/min_length": 49.75, "epoch": 3.7356664184661206, "grad_norm": 0.8764795800735135, "kl": 0.27392578125, "learning_rate": 7.025902526475038e-07, "loss": 0.0020633842796087265, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2507, "train_speed(iter/s)": 0.02838 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.0, "completions/mean_length": 112.59375381469727, "completions/min_length": 38.0, "epoch": 3.737155621742368, "grad_norm": 0.6197543836688109, "kl": 0.28466796875, "learning_rate": 7.023740180604169e-07, "loss": 0.003701531095430255, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2508, "train_speed(iter/s)": 0.028379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 103.67708587646484, "completions/min_length": 30.75, "epoch": 3.738644825018615, "grad_norm": 0.5476045885878754, "kl": 0.29736328125, "learning_rate": 7.021577381985081e-07, "loss": -0.0225893035531044, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2509, "train_speed(iter/s)": 0.028382 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.75, "completions/mean_length": 123.47916984558105, "completions/min_length": 32.5, "epoch": 3.7401340282948623, "grad_norm": 0.005694528300395343, "kl": 0.272705078125, "learning_rate": 7.019414131101633e-07, "loss": 0.00027311069425195456, "memory(GiB)": 112.53, "reward": 1.3333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.3333333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2510, "train_speed(iter/s)": 0.028381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.5, "completions/mean_length": 119.67708587646484, "completions/min_length": 43.0, "epoch": 3.7416232315711095, "grad_norm": 0.0067901481741630566, "kl": 0.28271484375, "learning_rate": 7.017250428437786e-07, "loss": 0.00028340014978311956, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2511, "train_speed(iter/s)": 0.028376 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.25, "completions/mean_length": 112.18750381469727, "completions/min_length": 42.25, "epoch": 3.743112434847357, "grad_norm": 1.953476516926959, "kl": 0.279052734375, "learning_rate": 7.0150862744776e-07, "loss": 0.005749819800257683, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2512, "train_speed(iter/s)": 0.028377 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.5, "completions/mean_length": 124.30208778381348, "completions/min_length": 34.25, "epoch": 3.744601638123604, "grad_norm": 0.603563874424212, "kl": 0.64208984375, "learning_rate": 7.012921669705234e-07, "loss": -0.00954081304371357, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2513, "train_speed(iter/s)": 0.028375 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 116.04167175292969, "completions/min_length": 45.75, "epoch": 3.746090841399851, "grad_norm": 0.005242471194945508, "kl": 0.28125, "learning_rate": 7.010756614604952e-07, "loss": 0.00028091680724173784, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2514, "train_speed(iter/s)": 0.028375 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.25, "completions/mean_length": 111.10416793823242, "completions/min_length": 40.5, "epoch": 3.7475800446760985, "grad_norm": 1.351949304948073, "kl": 0.29345703125, "learning_rate": 7.008591109661117e-07, "loss": -0.02816103957593441, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.32092025876045227, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2515, "train_speed(iter/s)": 0.028374 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.25, "completions/mean_length": 93.43750190734863, "completions/min_length": 28.0, "epoch": 3.7490692479523453, "grad_norm": 0.003998842415673659, "kl": 0.31494140625, "learning_rate": 7.006425155358195e-07, "loss": 0.00031454834970645607, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2516, "train_speed(iter/s)": 0.028374 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.75, "completions/mean_length": 113.92708396911621, "completions/min_length": 32.25, "epoch": 3.7505584512285925, "grad_norm": 0.00431981510113782, "kl": 0.30029296875, "learning_rate": 7.004258752180745e-07, "loss": 0.0002998544368892908, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2517, "train_speed(iter/s)": 0.02837 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.5, "completions/mean_length": 119.51041793823242, "completions/min_length": 38.0, "epoch": 3.75204765450484, "grad_norm": 1.183945403832999, "kl": 0.30859375, "learning_rate": 7.002091900613435e-07, "loss": -0.010061291977763176, "memory(GiB)": 112.53, "reward": 1.5416666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.15474381670355797, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2518, "train_speed(iter/s)": 0.028368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.25, "completions/mean_length": 133.9166717529297, "completions/min_length": 39.75, "epoch": 3.753536857781087, "grad_norm": 0.9653860905233628, "kl": 0.27685546875, "learning_rate": 6.999924601141029e-07, "loss": 0.0001534968614578247, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.3735327273607254, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2519, "train_speed(iter/s)": 0.028356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.0, "completions/mean_length": 117.13541984558105, "completions/min_length": 34.75, "epoch": 3.7550260610573343, "grad_norm": 1.5366267317463167, "kl": 0.30517578125, "learning_rate": 6.997756854248391e-07, "loss": -0.011326093226671219, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.45247404277324677, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2520, "train_speed(iter/s)": 0.028358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 425.5, "completions/mean_length": 141.1354217529297, "completions/min_length": 37.25, "epoch": 3.7565152643335815, "grad_norm": 0.07951910166050431, "kl": 0.289794921875, "learning_rate": 6.99558866042049e-07, "loss": 0.00028975738678127527, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2521, "train_speed(iter/s)": 0.028353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 366.0, "completions/mean_length": 132.03125381469727, "completions/min_length": 38.5, "epoch": 3.7580044676098288, "grad_norm": 0.004068946444714615, "kl": 0.257568359375, "learning_rate": 6.993420020142388e-07, "loss": 0.0002581423323135823, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2522, "train_speed(iter/s)": 0.028348 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.5, "completions/mean_length": 114.68750190734863, "completions/min_length": 39.0, "epoch": 3.759493670886076, "grad_norm": 1.1226266844602928, "kl": 0.2880859375, "learning_rate": 6.99125093389925e-07, "loss": 0.01845644786953926, "memory(GiB)": 112.53, "reward": 1.90625, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2523, "train_speed(iter/s)": 0.02835 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.0, "completions/mean_length": 104.22916793823242, "completions/min_length": 39.75, "epoch": 3.7609828741623232, "grad_norm": 0.0050561992078135875, "kl": 0.3173828125, "learning_rate": 6.98908140217634e-07, "loss": 0.000317090394673869, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2524, "train_speed(iter/s)": 0.028352 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.0, "completions/mean_length": 121.05208396911621, "completions/min_length": 36.0, "epoch": 3.7624720774385705, "grad_norm": 0.003934393649684306, "kl": 0.277099609375, "learning_rate": 6.986911425459027e-07, "loss": 0.0002767058613244444, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2525, "train_speed(iter/s)": 0.028348 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 101.98958587646484, "completions/min_length": 41.25, "epoch": 3.7639612807148177, "grad_norm": 2.7802062670869123, "kl": 0.328125, "learning_rate": 6.984741004232769e-07, "loss": -0.030172359198331833, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.47428806871175766, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2526, "train_speed(iter/s)": 0.028351 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.25, "completions/mean_length": 110.06250190734863, "completions/min_length": 33.5, "epoch": 3.7654504839910645, "grad_norm": 0.003939201502772074, "kl": 0.28125, "learning_rate": 6.982570138983135e-07, "loss": 0.00028135915636084974, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2527, "train_speed(iter/s)": 0.028353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.5, "completions/mean_length": 124.00000381469727, "completions/min_length": 29.0, "epoch": 3.766939687267312, "grad_norm": 0.003918784743565257, "kl": 0.27978515625, "learning_rate": 6.980398830195784e-07, "loss": 0.00027992454124614596, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2528, "train_speed(iter/s)": 0.028349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 358.25, "completions/mean_length": 130.46875381469727, "completions/min_length": 41.25, "epoch": 3.768428890543559, "grad_norm": 0.004046660952265116, "kl": 0.25390625, "learning_rate": 6.97822707835648e-07, "loss": 0.0002537749824114144, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2529, "train_speed(iter/s)": 0.028347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 390.5, "completions/mean_length": 148.92708587646484, "completions/min_length": 48.75, "epoch": 3.7699180938198062, "grad_norm": 0.0036048602774939936, "kl": 0.262939453125, "learning_rate": 6.976054883951084e-07, "loss": 0.0002632133837323636, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2530, "train_speed(iter/s)": 0.028339 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.5, "completions/mean_length": 125.5937557220459, "completions/min_length": 42.5, "epoch": 3.7714072970960535, "grad_norm": 0.003685367521386916, "kl": 0.280029296875, "learning_rate": 6.973882247465555e-07, "loss": 0.00028007844230160117, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2531, "train_speed(iter/s)": 0.028338 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.25, "completions/mean_length": 112.63541984558105, "completions/min_length": 33.75, "epoch": 3.7728965003723007, "grad_norm": 1.7905616444949868, "kl": 0.35009765625, "learning_rate": 6.971709169385952e-07, "loss": 0.000949371256865561, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2532, "train_speed(iter/s)": 0.028334 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 118.72916793823242, "completions/min_length": 40.0, "epoch": 3.774385703648548, "grad_norm": 0.004197971106704319, "kl": 0.265625, "learning_rate": 6.969535650198435e-07, "loss": 0.00026563205756247044, "memory(GiB)": 112.53, "reward": 1.4166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.4166666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2533, "train_speed(iter/s)": 0.028334 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.75, "completions/mean_length": 110.22916793823242, "completions/min_length": 44.0, "epoch": 3.775874906924795, "grad_norm": 0.003790256806612445, "kl": 0.29248046875, "learning_rate": 6.967361690389258e-07, "loss": 0.00029309222009032965, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2534, "train_speed(iter/s)": 0.028333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.5, "completions/mean_length": 114.625, "completions/min_length": 35.75, "epoch": 3.7773641102010425, "grad_norm": 0.876481664472021, "kl": 0.29736328125, "learning_rate": 6.965187290444778e-07, "loss": 0.027838319540023804, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2535, "train_speed(iter/s)": 0.02833 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.5, "completions/mean_length": 120.72916793823242, "completions/min_length": 20.25, "epoch": 3.7788533134772897, "grad_norm": 0.7974903565733389, "kl": 0.270751953125, "learning_rate": 6.963012450851449e-07, "loss": -0.028566457331180573, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2536, "train_speed(iter/s)": 0.028326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.25, "completions/mean_length": 117.82292175292969, "completions/min_length": 28.0, "epoch": 3.780342516753537, "grad_norm": 0.0034159134754185544, "kl": 0.28759765625, "learning_rate": 6.960837172095821e-07, "loss": 0.00028722936986014247, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2537, "train_speed(iter/s)": 0.028329 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.0, "completions/mean_length": 133.2916717529297, "completions/min_length": 32.5, "epoch": 3.781831720029784, "grad_norm": 0.004020777438959602, "kl": 0.25830078125, "learning_rate": 6.958661454664545e-07, "loss": 0.0002581448061391711, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2538, "train_speed(iter/s)": 0.028326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.0, "completions/mean_length": 115.11458969116211, "completions/min_length": 37.0, "epoch": 3.7833209233060314, "grad_norm": 0.9478386160308996, "kl": 0.2861328125, "learning_rate": 6.95648529904437e-07, "loss": 0.0013671116903424263, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2539, "train_speed(iter/s)": 0.028328 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.0, "completions/mean_length": 123.73958587646484, "completions/min_length": 37.25, "epoch": 3.7848101265822782, "grad_norm": 0.5396691756482487, "kl": 0.322509765625, "learning_rate": 6.954308705722142e-07, "loss": 0.01508212648332119, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2540, "train_speed(iter/s)": 0.028323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.5, "completions/mean_length": 119.19792175292969, "completions/min_length": 39.0, "epoch": 3.786299329858526, "grad_norm": 0.9640467686613763, "kl": 0.2666015625, "learning_rate": 6.952131675184805e-07, "loss": 0.04120689630508423, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2541, "train_speed(iter/s)": 0.028325 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.25, "completions/mean_length": 121.75000381469727, "completions/min_length": 30.0, "epoch": 3.7877885331347727, "grad_norm": 0.5439831111951926, "kl": 0.287109375, "learning_rate": 6.949954207919401e-07, "loss": -0.010435860604047775, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2542, "train_speed(iter/s)": 0.028324 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 350.25, "completions/mean_length": 116.54167175292969, "completions/min_length": 42.75, "epoch": 3.78927773641102, "grad_norm": 1.078801683862483, "kl": 0.29345703125, "learning_rate": 6.947776304413071e-07, "loss": -0.0029111276380717754, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2543, "train_speed(iter/s)": 0.028323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 379.0, "completions/mean_length": 122.16667175292969, "completions/min_length": 34.0, "epoch": 3.790766939687267, "grad_norm": 1.2232608969074812, "kl": 0.2763671875, "learning_rate": 6.94559796515305e-07, "loss": 0.0561700165271759, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2544, "train_speed(iter/s)": 0.02832 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.75, "completions/mean_length": 122.8125057220459, "completions/min_length": 30.5, "epoch": 3.7922561429635144, "grad_norm": 0.003504239426329126, "kl": 0.28564453125, "learning_rate": 6.943419190626675e-07, "loss": 0.0002864935086108744, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2545, "train_speed(iter/s)": 0.028319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.5, "completions/mean_length": 103.66666984558105, "completions/min_length": 30.75, "epoch": 3.7937453462397617, "grad_norm": 0.808345130629303, "kl": 0.31689453125, "learning_rate": 6.941239981321378e-07, "loss": 0.011210191994905472, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2546, "train_speed(iter/s)": 0.028322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.0, "completions/mean_length": 125.77083587646484, "completions/min_length": 30.25, "epoch": 3.795234549516009, "grad_norm": 0.003170628031331294, "kl": 0.28662109375, "learning_rate": 6.93906033772469e-07, "loss": 0.00028623230173252523, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2547, "train_speed(iter/s)": 0.028317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 352.25, "completions/mean_length": 124.98958587646484, "completions/min_length": 28.0, "epoch": 3.796723752792256, "grad_norm": 1.4482470729732742, "kl": 0.283203125, "learning_rate": 6.936880260324233e-07, "loss": 0.0007185991271398962, "memory(GiB)": 112.53, "reward": 1.6041667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2548, "train_speed(iter/s)": 0.028311 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.5, "completions/mean_length": 122.83333778381348, "completions/min_length": 34.0, "epoch": 3.7982129560685034, "grad_norm": 0.7080759977055786, "kl": 0.28125, "learning_rate": 6.934699749607735e-07, "loss": 0.010614747181534767, "memory(GiB)": 112.53, "reward": 1.8125, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2549, "train_speed(iter/s)": 0.02831 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 354.5, "completions/mean_length": 121.58333587646484, "completions/min_length": 35.75, "epoch": 3.7997021593447506, "grad_norm": 0.0036515412605860106, "kl": 0.28369140625, "learning_rate": 6.932518806063016e-07, "loss": 0.0002834385959431529, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2550, "train_speed(iter/s)": 0.028312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 105.90625381469727, "completions/min_length": 25.75, "epoch": 3.801191362620998, "grad_norm": 0.0034070764589483908, "kl": 0.2998046875, "learning_rate": 6.930337430177991e-07, "loss": 0.0002993041998706758, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2551, "train_speed(iter/s)": 0.028314 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 403.75, "completions/mean_length": 131.48958587646484, "completions/min_length": 44.25, "epoch": 3.802680565897245, "grad_norm": 1.7768871963084751, "kl": 0.2861328125, "learning_rate": 6.928155622440679e-07, "loss": -0.007279825862497091, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2552, "train_speed(iter/s)": 0.028306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 401.25, "completions/mean_length": 129.58333587646484, "completions/min_length": 36.75, "epoch": 3.804169769173492, "grad_norm": 0.7699318249006948, "kl": 0.2822265625, "learning_rate": 6.925973383339185e-07, "loss": 0.015216847881674767, "memory(GiB)": 112.53, "reward": 1.5208334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2553, "train_speed(iter/s)": 0.028298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.75, "completions/mean_length": 104.26041984558105, "completions/min_length": 27.5, "epoch": 3.8056589724497396, "grad_norm": 0.6682878724530479, "kl": 0.34033203125, "learning_rate": 6.923790713361722e-07, "loss": 0.009300360456109047, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2554, "train_speed(iter/s)": 0.028297 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.0, "completions/mean_length": 121.79166984558105, "completions/min_length": 38.0, "epoch": 3.8071481757259864, "grad_norm": 0.13384558523152795, "kl": 0.3896484375, "learning_rate": 6.921607612996591e-07, "loss": 0.0003896821872331202, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2555, "train_speed(iter/s)": 0.028296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.75, "completions/mean_length": 108.02083587646484, "completions/min_length": 30.75, "epoch": 3.8086373790022336, "grad_norm": 0.003838394324480342, "kl": 0.314453125, "learning_rate": 6.91942408273219e-07, "loss": 0.0003145368827972561, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2556, "train_speed(iter/s)": 0.028292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 510.25, "completions/mean_length": 120.44791793823242, "completions/min_length": 27.25, "epoch": 3.810126582278481, "grad_norm": 0.9220527360319201, "kl": 0.2763671875, "learning_rate": 6.917240123057019e-07, "loss": -0.06073760986328125, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2557, "train_speed(iter/s)": 0.028289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 381.0, "completions/mean_length": 122.16666793823242, "completions/min_length": 32.25, "epoch": 3.811615785554728, "grad_norm": 0.751616844750644, "kl": 0.28515625, "learning_rate": 6.915055734459668e-07, "loss": 0.006124132312834263, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2558, "train_speed(iter/s)": 0.028287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 100.85416793823242, "completions/min_length": 30.0, "epoch": 3.8131049888309754, "grad_norm": 1.6373349134885646, "kl": 0.3017578125, "learning_rate": 6.912870917428827e-07, "loss": 0.006248092278838158, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2559, "train_speed(iter/s)": 0.02828 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.75, "completions/mean_length": 100.76041984558105, "completions/min_length": 32.5, "epoch": 3.8145941921072226, "grad_norm": 0.8047040298345266, "kl": 0.3017578125, "learning_rate": 6.910685672453279e-07, "loss": -0.014288249425590038, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2560, "train_speed(iter/s)": 0.028282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.5, "completions/mean_length": 93.72916793823242, "completions/min_length": 28.0, "epoch": 3.81608339538347, "grad_norm": 0.6474410562421984, "kl": 0.32861328125, "learning_rate": 6.908500000021904e-07, "loss": 0.012788351625204086, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2561, "train_speed(iter/s)": 0.028282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.0, "completions/mean_length": 111.59375190734863, "completions/min_length": 35.0, "epoch": 3.817572598659717, "grad_norm": 2.0406573502679426, "kl": 0.2802734375, "learning_rate": 6.906313900623677e-07, "loss": -0.03645581007003784, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.1322161816060543, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2562, "train_speed(iter/s)": 0.028281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 103.23958396911621, "completions/min_length": 28.25, "epoch": 3.8190618019359643, "grad_norm": 1.2463675183408476, "kl": 0.291748046875, "learning_rate": 6.904127374747668e-07, "loss": 0.03501409292221069, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2563, "train_speed(iter/s)": 0.028282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.75, "completions/mean_length": 107.63541793823242, "completions/min_length": 27.75, "epoch": 3.8205510052122116, "grad_norm": 0.9804275159066658, "kl": 0.31982421875, "learning_rate": 6.901940422883046e-07, "loss": 0.018551820889115334, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2564, "train_speed(iter/s)": 0.028281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 89.64583587646484, "completions/min_length": 38.5, "epoch": 3.822040208488459, "grad_norm": 0.0035055620249861896, "kl": 0.318359375, "learning_rate": 6.899753045519072e-07, "loss": 0.0003186342946719378, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2565, "train_speed(iter/s)": 0.028281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 102.05208587646484, "completions/min_length": 33.75, "epoch": 3.8235294117647056, "grad_norm": 2.161923257351333, "kl": 0.32666015625, "learning_rate": 6.897565243145102e-07, "loss": 0.0035335819702595472, "memory(GiB)": 112.53, "reward": 1.5520834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2566, "train_speed(iter/s)": 0.028281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.25, "completions/mean_length": 96.42708587646484, "completions/min_length": 29.0, "epoch": 3.8250186150409533, "grad_norm": 2.5890902217855762, "kl": 0.310546875, "learning_rate": 6.895377016250588e-07, "loss": -0.009492090903222561, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.21214647591114044, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2567, "train_speed(iter/s)": 0.028281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 93.72916793823242, "completions/min_length": 35.25, "epoch": 3.8265078183172, "grad_norm": 1.7190567181399887, "kl": 0.3154296875, "learning_rate": 6.893188365325077e-07, "loss": -0.015849407762289047, "memory(GiB)": 112.53, "reward": 1.7083334028720856, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.4190470837056637, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2568, "train_speed(iter/s)": 0.028284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.0, "completions/mean_length": 110.72916793823242, "completions/min_length": 25.75, "epoch": 3.8279970215934473, "grad_norm": 1.4561798240094679, "kl": 0.27880859375, "learning_rate": 6.890999290858212e-07, "loss": -0.003032948588952422, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2569, "train_speed(iter/s)": 0.028277 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 95.53125190734863, "completions/min_length": 32.5, "epoch": 3.8294862248696946, "grad_norm": 1.3401199840629963, "kl": 0.3037109375, "learning_rate": 6.888809793339728e-07, "loss": -0.017296114936470985, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.4154365845024586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2570, "train_speed(iter/s)": 0.028274 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.75, "completions/mean_length": 97.16666984558105, "completions/min_length": 40.75, "epoch": 3.830975428145942, "grad_norm": 0.00417913159056416, "kl": 0.3212890625, "learning_rate": 6.886619873259457e-07, "loss": 0.0003210733411833644, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2571, "train_speed(iter/s)": 0.028276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 379.0, "completions/mean_length": 104.03125, "completions/min_length": 32.25, "epoch": 3.832464631422189, "grad_norm": 1.6732035235158178, "kl": 0.3330078125, "learning_rate": 6.884429531107325e-07, "loss": -0.02507905289530754, "memory(GiB)": 112.53, "reward": 1.447916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2572, "train_speed(iter/s)": 0.028269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.0, "completions/mean_length": 102.91666984558105, "completions/min_length": 41.75, "epoch": 3.8339538346984363, "grad_norm": 0.00369541749326185, "kl": 0.3037109375, "learning_rate": 6.882238767373351e-07, "loss": 0.0003033301327377558, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2573, "train_speed(iter/s)": 0.028269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.0, "completions/mean_length": 99.00000381469727, "completions/min_length": 29.25, "epoch": 3.8354430379746836, "grad_norm": 1.6034616243343514, "kl": 0.32421875, "learning_rate": 6.880047582547651e-07, "loss": -0.008992173708975315, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2574, "train_speed(iter/s)": 0.028262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.0, "completions/mean_length": 98.86458587646484, "completions/min_length": 31.25, "epoch": 3.836932241250931, "grad_norm": 1.721382901846699, "kl": 0.2998046875, "learning_rate": 6.877855977120434e-07, "loss": -0.006188839208334684, "memory(GiB)": 112.53, "reward": 1.7916667461395264, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.381936639547348, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2575, "train_speed(iter/s)": 0.028262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 87.48958587646484, "completions/min_length": 36.0, "epoch": 3.838421444527178, "grad_norm": 0.0036258043195153133, "kl": 0.3330078125, "learning_rate": 6.875663951581999e-07, "loss": 0.0003329052124172449, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2576, "train_speed(iter/s)": 0.028265 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.0, "completions/mean_length": 102.46875, "completions/min_length": 32.75, "epoch": 3.8399106478034253, "grad_norm": 0.9898892000635712, "kl": 0.29638671875, "learning_rate": 6.873471506422747e-07, "loss": 0.011452024802565575, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2577, "train_speed(iter/s)": 0.028262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.0, "completions/mean_length": 98.00000381469727, "completions/min_length": 21.25, "epoch": 3.8413998510796725, "grad_norm": 0.0043948976364445985, "kl": 0.34619140625, "learning_rate": 6.871278642133167e-07, "loss": 0.0003461789747234434, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2578, "train_speed(iter/s)": 0.028257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.5, "completions/mean_length": 97.51041984558105, "completions/min_length": 33.75, "epoch": 3.8428890543559193, "grad_norm": 0.006400153914131859, "kl": 0.32177734375, "learning_rate": 6.869085359203843e-07, "loss": 0.00032182730501517653, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2579, "train_speed(iter/s)": 0.028251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.25, "completions/mean_length": 100.95833587646484, "completions/min_length": 35.5, "epoch": 3.844378257632167, "grad_norm": 0.0040018609532288555, "kl": 0.298828125, "learning_rate": 6.866891658125456e-07, "loss": 0.00029913519392721355, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2580, "train_speed(iter/s)": 0.028252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/mean_length": 104.55208587646484, "completions/min_length": 36.0, "epoch": 3.845867460908414, "grad_norm": 0.6355189452785207, "kl": 0.32470703125, "learning_rate": 6.864697539388772e-07, "loss": -0.003767710644751787, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2581, "train_speed(iter/s)": 0.028254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.5, "completions/mean_length": 94.72916793823242, "completions/min_length": 32.0, "epoch": 3.847356664184661, "grad_norm": 0.9233518015789923, "kl": 0.31982421875, "learning_rate": 6.862503003484661e-07, "loss": 0.016098862513899803, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2582, "train_speed(iter/s)": 0.028252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.75, "completions/mean_length": 100.28125190734863, "completions/min_length": 28.0, "epoch": 3.8488458674609083, "grad_norm": 1.4603947360040777, "kl": 0.3125, "learning_rate": 6.86030805090408e-07, "loss": -0.02371296100318432, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2583, "train_speed(iter/s)": 0.028255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.0, "completions/mean_length": 102.56250381469727, "completions/min_length": 44.0, "epoch": 3.8503350707371555, "grad_norm": 0.003820541235240827, "kl": 0.30712890625, "learning_rate": 6.85811268213808e-07, "loss": 0.00030660867923870683, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2584, "train_speed(iter/s)": 0.028255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.0, "completions/mean_length": 100.87500190734863, "completions/min_length": 33.5, "epoch": 3.8518242740134028, "grad_norm": 0.004816355653425985, "kl": 0.29541015625, "learning_rate": 6.855916897677806e-07, "loss": 0.00029629492200911045, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2585, "train_speed(iter/s)": 0.028257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 95.09375190734863, "completions/min_length": 28.0, "epoch": 3.85331347728965, "grad_norm": 0.9141121789644364, "kl": 0.35107421875, "learning_rate": 6.853720698014496e-07, "loss": -0.009186421521008015, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2586, "train_speed(iter/s)": 0.028254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.5, "completions/mean_length": 102.94791984558105, "completions/min_length": 41.25, "epoch": 3.8548026805658973, "grad_norm": 0.003390597068464813, "kl": 0.2998046875, "learning_rate": 6.851524083639479e-07, "loss": 0.00029987323796376586, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2587, "train_speed(iter/s)": 0.028251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 92.26041984558105, "completions/min_length": 26.75, "epoch": 3.8562918838421445, "grad_norm": 0.006224839596906208, "kl": 0.314453125, "learning_rate": 6.849327055044181e-07, "loss": 0.0003141805063933134, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2588, "train_speed(iter/s)": 0.028254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.5, "completions/mean_length": 99.27083587646484, "completions/min_length": 26.5, "epoch": 3.8577810871183917, "grad_norm": 1.489619611336165, "kl": 0.33251953125, "learning_rate": 6.847129612720117e-07, "loss": 0.01981416903436184, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2589, "train_speed(iter/s)": 0.028252 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 92.57291793823242, "completions/min_length": 41.5, "epoch": 3.859270290394639, "grad_norm": 0.003178506503846868, "kl": 0.306640625, "learning_rate": 6.844931757158896e-07, "loss": 0.0003061908937525004, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2590, "train_speed(iter/s)": 0.028251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 343.75, "completions/mean_length": 110.02083396911621, "completions/min_length": 42.5, "epoch": 3.8607594936708862, "grad_norm": 0.0039427899856889406, "kl": 0.31396484375, "learning_rate": 6.842733488852217e-07, "loss": 0.0003135387087240815, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2591, "train_speed(iter/s)": 0.028247 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 90.85416984558105, "completions/min_length": 26.75, "epoch": 3.862248696947133, "grad_norm": 0.0035795055271258367, "kl": 0.3203125, "learning_rate": 6.840534808291876e-07, "loss": 0.00031993017182685435, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2592, "train_speed(iter/s)": 0.028249 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 88.77083587646484, "completions/min_length": 30.5, "epoch": 3.8637379002233807, "grad_norm": 0.0038465615275080304, "kl": 0.34912109375, "learning_rate": 6.838335715969756e-07, "loss": 0.0003488370857667178, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2593, "train_speed(iter/s)": 0.028247 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 98.07291793823242, "completions/min_length": 31.75, "epoch": 3.8652271034996275, "grad_norm": 0.0032547871621484204, "kl": 0.32080078125, "learning_rate": 6.836136212377837e-07, "loss": 0.0003205966204404831, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2594, "train_speed(iter/s)": 0.028246 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 87.12500190734863, "completions/min_length": 32.25, "epoch": 3.8667163067758747, "grad_norm": 0.0033456269419109047, "kl": 0.34033203125, "learning_rate": 6.833936298008188e-07, "loss": 0.0003404194430913776, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2595, "train_speed(iter/s)": 0.028247 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 83.65625190734863, "completions/min_length": 31.0, "epoch": 3.868205510052122, "grad_norm": 0.004570778162682173, "kl": 0.3544921875, "learning_rate": 6.831735973352973e-07, "loss": 0.00035371509147807956, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2596, "train_speed(iter/s)": 0.028247 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.75, "completions/mean_length": 99.41666984558105, "completions/min_length": 28.75, "epoch": 3.8696947133283692, "grad_norm": 1.338140020251311, "kl": 0.33642578125, "learning_rate": 6.82953523890444e-07, "loss": 0.0038250782527029514, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2597, "train_speed(iter/s)": 0.028244 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/mean_length": 88.89583587646484, "completions/min_length": 33.5, "epoch": 3.8711839166046165, "grad_norm": 0.0031910015493407055, "kl": 0.32861328125, "learning_rate": 6.827334095154938e-07, "loss": 0.00032788229873403907, "memory(GiB)": 112.53, "reward": 1.3333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.3333333432674408, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2598, "train_speed(iter/s)": 0.028242 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.25, "completions/mean_length": 93.97916984558105, "completions/min_length": 28.5, "epoch": 3.8726731198808637, "grad_norm": 0.0042393084519178875, "kl": 0.33056640625, "learning_rate": 6.825132542596903e-07, "loss": 0.0003302696568425745, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2599, "train_speed(iter/s)": 0.028235 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 329.5, "completions/mean_length": 101.22916793823242, "completions/min_length": 39.0, "epoch": 3.874162323157111, "grad_norm": 0.0030081801404934403, "kl": 0.328125, "learning_rate": 6.822930581722863e-07, "loss": 0.0003277260984759778, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2600, "train_speed(iter/s)": 0.028231 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.75, "completions/mean_length": 92.22916984558105, "completions/min_length": 31.5, "epoch": 3.875651526433358, "grad_norm": 0.0032650990081349467, "kl": 0.33251953125, "learning_rate": 6.820728213025437e-07, "loss": 0.00033236731542274356, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2601, "train_speed(iter/s)": 0.028228 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 95.59375190734863, "completions/min_length": 26.0, "epoch": 3.8771407297096054, "grad_norm": 1.7516449154810798, "kl": 0.353515625, "learning_rate": 6.818525436997336e-07, "loss": 0.00015693649766035378, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2602, "train_speed(iter/s)": 0.028221 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 86.42708396911621, "completions/min_length": 27.25, "epoch": 3.8786299329858527, "grad_norm": 0.0035496531729118084, "kl": 0.359375, "learning_rate": 6.816322254131364e-07, "loss": 0.0003593768924474716, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2603, "train_speed(iter/s)": 0.028223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.0, "completions/mean_length": 97.57291793823242, "completions/min_length": 30.5, "epoch": 3.8801191362621, "grad_norm": 0.0035633086039749706, "kl": 0.314453125, "learning_rate": 6.814118664920409e-07, "loss": 0.00031509617110714316, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2604, "train_speed(iter/s)": 0.02822 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 82.67708587646484, "completions/min_length": 15.75, "epoch": 3.8816083395383467, "grad_norm": 1.094848191410877, "kl": 0.36376953125, "learning_rate": 6.811914669857461e-07, "loss": 0.0014746810775250196, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2605, "train_speed(iter/s)": 0.028215 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.25, "completions/mean_length": 88.39583396911621, "completions/min_length": 23.5, "epoch": 3.8830975428145944, "grad_norm": 0.004236911233971795, "kl": 0.32421875, "learning_rate": 6.809710269435589e-07, "loss": 0.0003248587599955499, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2606, "train_speed(iter/s)": 0.028215 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.25, "completions/mean_length": 89.62500190734863, "completions/min_length": 26.25, "epoch": 3.884586746090841, "grad_norm": 0.0034946177787034145, "kl": 0.32373046875, "learning_rate": 6.807505464147962e-07, "loss": 0.00032328636734746397, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2607, "train_speed(iter/s)": 0.028218 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 86.83333587646484, "completions/min_length": 40.0, "epoch": 3.8860759493670884, "grad_norm": 2.057767267943756, "kl": 0.32958984375, "learning_rate": 6.805300254487833e-07, "loss": 0.018260207027196884, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2608, "train_speed(iter/s)": 0.028221 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 86.71875381469727, "completions/min_length": 30.0, "epoch": 3.8875651526433357, "grad_norm": 0.0036098419197025822, "kl": 0.353515625, "learning_rate": 6.803094640948552e-07, "loss": 0.0003537113661877811, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2609, "train_speed(iter/s)": 0.028215 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 96.06250190734863, "completions/min_length": 30.0, "epoch": 3.889054355919583, "grad_norm": 0.003353252006342593, "kl": 0.30615234375, "learning_rate": 6.800888624023552e-07, "loss": 0.0003064627235289663, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2610, "train_speed(iter/s)": 0.028215 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.25, "completions/mean_length": 108.82291793823242, "completions/min_length": 27.75, "epoch": 3.89054355919583, "grad_norm": 0.00373920839496221, "kl": 0.30615234375, "learning_rate": 6.798682204206361e-07, "loss": 0.0003067046054638922, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2611, "train_speed(iter/s)": 0.02821 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 86.44791793823242, "completions/min_length": 30.5, "epoch": 3.8920327624720774, "grad_norm": 0.0045834306119595805, "kl": 0.361328125, "learning_rate": 6.796475381990597e-07, "loss": 0.00036054171505384147, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2612, "train_speed(iter/s)": 0.028213 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 101.11458587646484, "completions/min_length": 36.75, "epoch": 3.8935219657483247, "grad_norm": 1.8723402190874365, "kl": 0.30615234375, "learning_rate": 6.794268157869967e-07, "loss": 0.012533615343272686, "memory(GiB)": 112.53, "reward": 1.4687500596046448, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2613, "train_speed(iter/s)": 0.028214 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.0, "completions/mean_length": 94.53125190734863, "completions/min_length": 39.0, "epoch": 3.895011169024572, "grad_norm": 0.5606409674289792, "kl": 0.3369140625, "learning_rate": 6.792060532338267e-07, "loss": 0.025118418037891388, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2614, "train_speed(iter/s)": 0.028217 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.25, "completions/mean_length": 110.01041984558105, "completions/min_length": 28.0, "epoch": 3.896500372300819, "grad_norm": 0.004331796559275703, "kl": 0.30419921875, "learning_rate": 6.789852505889382e-07, "loss": 0.000304480257909745, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2615, "train_speed(iter/s)": 0.02821 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 82.48958778381348, "completions/min_length": 34.5, "epoch": 3.8979895755770664, "grad_norm": 1.3699802027685826, "kl": 0.35400390625, "learning_rate": 6.787644079017293e-07, "loss": 0.013038384728133678, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2616, "train_speed(iter/s)": 0.028211 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 379.25, "completions/mean_length": 101.32291793823242, "completions/min_length": 27.0, "epoch": 3.8994787788533136, "grad_norm": 0.00340761833542864, "kl": 0.33251953125, "learning_rate": 6.785435252216064e-07, "loss": 0.00033251228160224855, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2617, "train_speed(iter/s)": 0.028207 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 90.43750190734863, "completions/min_length": 21.5, "epoch": 3.9009679821295604, "grad_norm": 0.6890520901707721, "kl": 0.31640625, "learning_rate": 6.783226025979849e-07, "loss": 0.013737671077251434, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2618, "train_speed(iter/s)": 0.028207 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 89.71875381469727, "completions/min_length": 27.25, "epoch": 3.902457185405808, "grad_norm": 0.003337118070917114, "kl": 0.34375, "learning_rate": 6.781016400802895e-07, "loss": 0.00034377601696178317, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2619, "train_speed(iter/s)": 0.0282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 88.34375, "completions/min_length": 27.5, "epoch": 3.903946388682055, "grad_norm": 0.7998544804915385, "kl": 0.3583984375, "learning_rate": 6.778806377179535e-07, "loss": 0.013229629024863243, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2620, "train_speed(iter/s)": 0.028201 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.75, "completions/mean_length": 91.22916984558105, "completions/min_length": 25.0, "epoch": 3.905435591958302, "grad_norm": 0.004259356669037016, "kl": 0.31884765625, "learning_rate": 6.776595955604192e-07, "loss": 0.0003184736124239862, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2621, "train_speed(iter/s)": 0.0282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/mean_length": 87.83333587646484, "completions/min_length": 27.75, "epoch": 3.9069247952345494, "grad_norm": 0.004533610891403084, "kl": 0.34326171875, "learning_rate": 6.774385136571379e-07, "loss": 0.0003434894315432757, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2622, "train_speed(iter/s)": 0.028197 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.25, "completions/mean_length": 96.89583587646484, "completions/min_length": 32.75, "epoch": 3.9084139985107966, "grad_norm": 0.8638691847294794, "kl": 0.35302734375, "learning_rate": 6.772173920575699e-07, "loss": -0.005660985596477985, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2623, "train_speed(iter/s)": 0.028193 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 80.13541793823242, "completions/min_length": 15.75, "epoch": 3.909903201787044, "grad_norm": 0.003940107569662874, "kl": 0.3759765625, "learning_rate": 6.769962308111839e-07, "loss": 0.0003754503559321165, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2624, "train_speed(iter/s)": 0.028196 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 90.62500190734863, "completions/min_length": 35.5, "epoch": 3.911392405063291, "grad_norm": 0.004245678645251296, "kl": 0.35498046875, "learning_rate": 6.767750299674578e-07, "loss": 0.00035436227335594594, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2625, "train_speed(iter/s)": 0.028192 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 81.98958396911621, "completions/min_length": 29.25, "epoch": 3.9128816083395384, "grad_norm": 0.004602468583527028, "kl": 0.40576171875, "learning_rate": 6.765537895758784e-07, "loss": 0.0004053188022226095, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2626, "train_speed(iter/s)": 0.02819 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.5, "completions/mean_length": 75.16666793823242, "completions/min_length": 28.75, "epoch": 3.9143708116157856, "grad_norm": 1.2172140498194841, "kl": 0.41943359375, "learning_rate": 6.763325096859413e-07, "loss": -0.015785543248057365, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2627, "train_speed(iter/s)": 0.028186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.5, "completions/mean_length": 90.40625381469727, "completions/min_length": 34.5, "epoch": 3.915860014892033, "grad_norm": 0.9825629186077202, "kl": 0.36767578125, "learning_rate": 6.761111903471511e-07, "loss": 0.03233455866575241, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2628, "train_speed(iter/s)": 0.028188 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 76.45833587646484, "completions/min_length": 22.25, "epoch": 3.91734921816828, "grad_norm": 0.0043005881493044604, "kl": 0.4267578125, "learning_rate": 6.758898316090207e-07, "loss": 0.000427015358582139, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2629, "train_speed(iter/s)": 0.028182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 71.40625190734863, "completions/min_length": 19.5, "epoch": 3.9188384214445273, "grad_norm": 0.00409642367616782, "kl": 0.40966796875, "learning_rate": 6.756684335210723e-07, "loss": 0.00040966467349790037, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2630, "train_speed(iter/s)": 0.028186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.5, "completions/mean_length": 78.50000381469727, "completions/min_length": 22.25, "epoch": 3.920327624720774, "grad_norm": 0.9029477423290817, "kl": 0.39697265625, "learning_rate": 6.754469961328368e-07, "loss": -0.0006018605781719089, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2631, "train_speed(iter/s)": 0.028185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.75, "completions/mean_length": 83.15625381469727, "completions/min_length": 27.25, "epoch": 3.921816827997022, "grad_norm": 0.007006344426836589, "kl": 0.42041015625, "learning_rate": 6.752255194938539e-07, "loss": 0.00042039394611492753, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2632, "train_speed(iter/s)": 0.028184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.25, "completions/mean_length": 74.32291984558105, "completions/min_length": 29.5, "epoch": 3.9233060312732686, "grad_norm": 0.004448540928160086, "kl": 0.435546875, "learning_rate": 6.750040036536717e-07, "loss": 0.0004361444734968245, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2633, "train_speed(iter/s)": 0.02818 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 69.36458587646484, "completions/min_length": 28.75, "epoch": 3.924795234549516, "grad_norm": 2.1475552532081337, "kl": 0.47216796875, "learning_rate": 6.74782448661848e-07, "loss": -0.008352096192538738, "memory(GiB)": 112.53, "reward": 1.479166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.479166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2634, "train_speed(iter/s)": 0.028181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 74.65625095367432, "completions/min_length": 26.0, "epoch": 3.926284437825763, "grad_norm": 0.004400420595572477, "kl": 0.455078125, "learning_rate": 6.745608545679481e-07, "loss": 0.0004554629558697343, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2635, "train_speed(iter/s)": 0.028181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 78.15625381469727, "completions/min_length": 26.75, "epoch": 3.9277736411020103, "grad_norm": 0.9011747688418134, "kl": 0.4375, "learning_rate": 6.743392214215472e-07, "loss": -0.0005861054523847997, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2636, "train_speed(iter/s)": 0.028181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 78.96875381469727, "completions/min_length": 31.75, "epoch": 3.9292628443782576, "grad_norm": 0.004174430344389283, "kl": 0.4365234375, "learning_rate": 6.741175492722286e-07, "loss": 0.0004361949977464974, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2637, "train_speed(iter/s)": 0.028181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 75.58333492279053, "completions/min_length": 30.0, "epoch": 3.930752047654505, "grad_norm": 0.007369046433695276, "kl": 0.435546875, "learning_rate": 6.738958381695845e-07, "loss": 0.00043552927672863007, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2638, "train_speed(iter/s)": 0.028185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.25, "completions/mean_length": 76.62500381469727, "completions/min_length": 32.0, "epoch": 3.932241250930752, "grad_norm": 1.037116578936261, "kl": 0.42919921875, "learning_rate": 6.736740881632154e-07, "loss": -0.0017707364168018103, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2639, "train_speed(iter/s)": 0.028182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 362.0, "completions/mean_length": 81.09375381469727, "completions/min_length": 25.5, "epoch": 3.9337304542069993, "grad_norm": 0.004052709197706314, "kl": 0.4287109375, "learning_rate": 6.734522993027315e-07, "loss": 0.00042933470103889704, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2640, "train_speed(iter/s)": 0.028181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 79.30208587646484, "completions/min_length": 35.0, "epoch": 3.9352196574832465, "grad_norm": 1.5795447118776544, "kl": 0.44140625, "learning_rate": 6.732304716377509e-07, "loss": 0.013826489448547363, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2641, "train_speed(iter/s)": 0.028176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 75.33333587646484, "completions/min_length": 22.75, "epoch": 3.9367088607594938, "grad_norm": 1.0127794658700588, "kl": 0.4423828125, "learning_rate": 6.730086052179002e-07, "loss": -0.0052105821669101715, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2642, "train_speed(iter/s)": 0.028176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 76.54166984558105, "completions/min_length": 34.0, "epoch": 3.938198064035741, "grad_norm": 1.0161427726459042, "kl": 0.43505859375, "learning_rate": 6.727867000928155e-07, "loss": -0.011783663183450699, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2643, "train_speed(iter/s)": 0.028174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 73.07291793823242, "completions/min_length": 26.5, "epoch": 3.939687267311988, "grad_norm": 0.004641976082739697, "kl": 0.4716796875, "learning_rate": 6.725647563121408e-07, "loss": 0.00047205653390847147, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2644, "train_speed(iter/s)": 0.028172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 71.69791984558105, "completions/min_length": 30.5, "epoch": 3.9411764705882355, "grad_norm": 0.007927238435383307, "kl": 0.484375, "learning_rate": 6.72342773925529e-07, "loss": 0.00048399687511846423, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2645, "train_speed(iter/s)": 0.028173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.5, "completions/mean_length": 72.76041984558105, "completions/min_length": 34.5, "epoch": 3.9426656738644823, "grad_norm": 0.005253233586426944, "kl": 0.43994140625, "learning_rate": 6.721207529826421e-07, "loss": 0.000439491675933823, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2646, "train_speed(iter/s)": 0.028177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 72.84375190734863, "completions/min_length": 34.0, "epoch": 3.94415487714073, "grad_norm": 0.0043988209923666284, "kl": 0.4462890625, "learning_rate": 6.718986935331497e-07, "loss": 0.00044706446351483464, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2647, "train_speed(iter/s)": 0.028172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 127.25, "completions/mean_length": 68.12500190734863, "completions/min_length": 24.5, "epoch": 3.945644080416977, "grad_norm": 0.004262142544914288, "kl": 0.4619140625, "learning_rate": 6.716765956267312e-07, "loss": 0.00046209199354052544, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2648, "train_speed(iter/s)": 0.028174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 73.23958587646484, "completions/min_length": 28.0, "epoch": 3.947133283693224, "grad_norm": 0.0055350065323433555, "kl": 0.4375, "learning_rate": 6.714544593130738e-07, "loss": 0.0004378321464173496, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2649, "train_speed(iter/s)": 0.028174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 67.64583587646484, "completions/min_length": 31.0, "epoch": 3.9486224869694713, "grad_norm": 0.004887983122047707, "kl": 0.4775390625, "learning_rate": 6.712322846418732e-07, "loss": 0.0004773953405674547, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2650, "train_speed(iter/s)": 0.028175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.25, "completions/mean_length": 68.93750190734863, "completions/min_length": 35.25, "epoch": 3.9501116902457185, "grad_norm": 0.8406986128537197, "kl": 0.47998046875, "learning_rate": 6.710100716628344e-07, "loss": -0.018567640334367752, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2651, "train_speed(iter/s)": 0.028179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.25, "completions/mean_length": 80.52083587646484, "completions/min_length": 35.25, "epoch": 3.9516008935219658, "grad_norm": 2.3057265334232526, "kl": 0.41748046875, "learning_rate": 6.707878204256702e-07, "loss": -0.013380005024373531, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8854166865348816, "rewards/CineAccuracyORM/std": 0.17693356797099113, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2652, "train_speed(iter/s)": 0.028183 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 72.47916889190674, "completions/min_length": 36.25, "epoch": 3.953090096798213, "grad_norm": 0.004753661156527075, "kl": 0.47021484375, "learning_rate": 6.70565530980103e-07, "loss": 0.0004706430481746793, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2653, "train_speed(iter/s)": 0.028181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 78.07291793823242, "completions/min_length": 31.0, "epoch": 3.9545793000744602, "grad_norm": 0.004408463787695358, "kl": 0.4521484375, "learning_rate": 6.703432033758623e-07, "loss": 0.0004522586241364479, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2654, "train_speed(iter/s)": 0.028178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 79.79166793823242, "completions/min_length": 43.75, "epoch": 3.9560685033507075, "grad_norm": 1.3315931682523297, "kl": 0.40576171875, "learning_rate": 6.701208376626873e-07, "loss": -0.000560117419809103, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2655, "train_speed(iter/s)": 0.028182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.75, "completions/mean_length": 90.10416984558105, "completions/min_length": 34.25, "epoch": 3.9575577066269547, "grad_norm": 0.7413456612130582, "kl": 0.39892578125, "learning_rate": 6.698984338903253e-07, "loss": -6.059937732061371e-05, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2656, "train_speed(iter/s)": 0.028181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 74.10416984558105, "completions/min_length": 28.25, "epoch": 3.9590469099032015, "grad_norm": 3.201351114886679, "kl": 0.4453125, "learning_rate": 6.696759921085321e-07, "loss": 0.010449017398059368, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2657, "train_speed(iter/s)": 0.028182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 72.68750381469727, "completions/min_length": 30.0, "epoch": 3.960536113179449, "grad_norm": 1.3404317824187024, "kl": 0.4189453125, "learning_rate": 6.69453512367072e-07, "loss": -0.0031154004391282797, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2658, "train_speed(iter/s)": 0.028186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.75, "completions/mean_length": 73.87500190734863, "completions/min_length": 29.25, "epoch": 3.962025316455696, "grad_norm": 0.005208318741058152, "kl": 0.43994140625, "learning_rate": 6.692309947157179e-07, "loss": 0.0004397924931254238, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2659, "train_speed(iter/s)": 0.028183 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.5, "completions/mean_length": 77.32291793823242, "completions/min_length": 21.25, "epoch": 3.9635145197319437, "grad_norm": 0.004310185683353303, "kl": 0.43701171875, "learning_rate": 6.690084392042513e-07, "loss": 0.00043724532588385046, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2660, "train_speed(iter/s)": 0.028178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 78.44792175292969, "completions/min_length": 35.75, "epoch": 3.9650037230081905, "grad_norm": 0.0045331289800860505, "kl": 0.3935546875, "learning_rate": 6.687858458824618e-07, "loss": 0.000393339287256822, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2661, "train_speed(iter/s)": 0.028181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.0, "completions/mean_length": 88.22916793823242, "completions/min_length": 35.5, "epoch": 3.9664929262844377, "grad_norm": 0.004214268839446092, "kl": 0.40673828125, "learning_rate": 6.685632148001477e-07, "loss": 0.0004065638640895486, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2662, "train_speed(iter/s)": 0.028181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/mean_length": 91.47916984558105, "completions/min_length": 28.5, "epoch": 3.967982129560685, "grad_norm": 0.004544194912796951, "kl": 0.369140625, "learning_rate": 6.683405460071156e-07, "loss": 0.00036825420102104545, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2663, "train_speed(iter/s)": 0.028177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.0, "completions/mean_length": 86.10416793823242, "completions/min_length": 39.75, "epoch": 3.969471332836932, "grad_norm": 0.0037419294517905444, "kl": 0.359375, "learning_rate": 6.681178395531808e-07, "loss": 0.0003594297158997506, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2664, "train_speed(iter/s)": 0.028177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 77.91666984558105, "completions/min_length": 34.25, "epoch": 3.9709605361131795, "grad_norm": 0.004894857899570029, "kl": 0.42041015625, "learning_rate": 6.678950954881668e-07, "loss": 0.0004202997952234, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2665, "train_speed(iter/s)": 0.028175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 87.43750190734863, "completions/min_length": 39.0, "epoch": 3.9724497393894267, "grad_norm": 2.8202589262105007, "kl": 0.4072265625, "learning_rate": 6.676723138619056e-07, "loss": 0.016989633440971375, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2666, "train_speed(iter/s)": 0.028172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.0, "completions/mean_length": 92.90625190734863, "completions/min_length": 41.5, "epoch": 3.973938942665674, "grad_norm": 0.6207210173545809, "kl": 0.365234375, "learning_rate": 6.674494947242376e-07, "loss": -0.010460512712597847, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2667, "train_speed(iter/s)": 0.028167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 87.06250190734863, "completions/min_length": 40.5, "epoch": 3.975428145941921, "grad_norm": 3.684731419829576, "kl": 0.35986328125, "learning_rate": 6.672266381250114e-07, "loss": -0.03791811689734459, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2668, "train_speed(iter/s)": 0.028164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.25, "completions/mean_length": 84.87500190734863, "completions/min_length": 34.25, "epoch": 3.9769173492181684, "grad_norm": 0.0037258783243389808, "kl": 0.35986328125, "learning_rate": 6.670037441140843e-07, "loss": 0.00035939630470238626, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2669, "train_speed(iter/s)": 0.028166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 87.89583587646484, "completions/min_length": 30.25, "epoch": 3.978406552494415, "grad_norm": 0.0043781286759073754, "kl": 0.36865234375, "learning_rate": 6.667808127413219e-07, "loss": 0.00036822096444666386, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2670, "train_speed(iter/s)": 0.028167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 81.95833587646484, "completions/min_length": 33.75, "epoch": 3.979895755770663, "grad_norm": 0.003919375827805934, "kl": 0.36865234375, "learning_rate": 6.665578440565979e-07, "loss": 0.00036865699803456664, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2671, "train_speed(iter/s)": 0.028164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.5, "completions/mean_length": 82.60416984558105, "completions/min_length": 36.5, "epoch": 3.9813849590469097, "grad_norm": 0.004011958101685968, "kl": 0.39453125, "learning_rate": 6.663348381097948e-07, "loss": 0.0003947817604057491, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2672, "train_speed(iter/s)": 0.028165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.25, "completions/mean_length": 85.08333396911621, "completions/min_length": 39.25, "epoch": 3.9828741623231574, "grad_norm": 1.2576030885362623, "kl": 0.3935546875, "learning_rate": 6.661117949508028e-07, "loss": 0.0032674637623131275, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2673, "train_speed(iter/s)": 0.028166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 82.97916984558105, "completions/min_length": 42.5, "epoch": 3.984363365599404, "grad_norm": 0.9040757543509733, "kl": 0.5751953125, "learning_rate": 6.658887146295211e-07, "loss": 0.0005757875624112785, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2674, "train_speed(iter/s)": 0.028166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.0, "completions/mean_length": 96.65625190734863, "completions/min_length": 42.25, "epoch": 3.9858525688756514, "grad_norm": 0.0039221872054452005, "kl": 0.35009765625, "learning_rate": 6.656655971958567e-07, "loss": 0.00034995676833204925, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2675, "train_speed(iter/s)": 0.028166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.75, "completions/mean_length": 101.33333587646484, "completions/min_length": 39.75, "epoch": 3.9873417721518987, "grad_norm": 0.003588481980764129, "kl": 0.3232421875, "learning_rate": 6.654424426997254e-07, "loss": 0.00032254180405288935, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2676, "train_speed(iter/s)": 0.028162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 98.02083587646484, "completions/min_length": 39.25, "epoch": 3.988830975428146, "grad_norm": 0.0037280558585320645, "kl": 0.337890625, "learning_rate": 6.652192511910506e-07, "loss": 0.00033797044306993484, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2677, "train_speed(iter/s)": 0.028162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.5, "completions/mean_length": 92.52083396911621, "completions/min_length": 33.0, "epoch": 3.990320178704393, "grad_norm": 1.1768748796041293, "kl": 0.357421875, "learning_rate": 6.649960227197647e-07, "loss": -0.028098901733756065, "memory(GiB)": 112.53, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833432674408, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2678, "train_speed(iter/s)": 0.028158 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 92.31250190734863, "completions/min_length": 28.0, "epoch": 3.9918093819806404, "grad_norm": 1.9871618350829676, "kl": 0.35498046875, "learning_rate": 6.647727573358078e-07, "loss": 0.0002982043370138854, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2679, "train_speed(iter/s)": 0.028159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 92.63541984558105, "completions/min_length": 37.0, "epoch": 3.9932985852568876, "grad_norm": 1.6520328041314298, "kl": 0.341796875, "learning_rate": 6.645494550891288e-07, "loss": 0.016851898282766342, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2680, "train_speed(iter/s)": 0.028162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.0, "completions/mean_length": 108.40625381469727, "completions/min_length": 32.25, "epoch": 3.994787788533135, "grad_norm": 0.004776708898016825, "kl": 0.32080078125, "learning_rate": 6.643261160296844e-07, "loss": 0.0003207047702744603, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2681, "train_speed(iter/s)": 0.028159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.25, "completions/mean_length": 102.94791984558105, "completions/min_length": 35.0, "epoch": 3.996276991809382, "grad_norm": 3.1659173282936117, "kl": 0.3349609375, "learning_rate": 6.641027402074397e-07, "loss": -0.002579229651018977, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2682, "train_speed(iter/s)": 0.028152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.75, "completions/mean_length": 102.32291984558105, "completions/min_length": 39.5, "epoch": 3.997766195085629, "grad_norm": 0.0034246154538724823, "kl": 0.31005859375, "learning_rate": 6.638793276723677e-07, "loss": 0.00030991938547231257, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2683, "train_speed(iter/s)": 0.028152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.5, "completions/mean_length": 105.36458778381348, "completions/min_length": 46.0, "epoch": 3.9992553983618766, "grad_norm": 0.7876354390970245, "kl": 0.3056640625, "learning_rate": 6.636558784744506e-07, "loss": 0.004907334689050913, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2684, "train_speed(iter/s)": 0.028152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.5, "completions/mean_length": 120.32291984558105, "completions/min_length": 46.75, "epoch": 4.001489203276247, "grad_norm": 0.003220435586409748, "kl": 0.282958984375, "learning_rate": 6.634323926636776e-07, "loss": 0.00028251580079086125, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2685, "train_speed(iter/s)": 0.028143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 493.75, "completions/mean_length": 120.66666793823242, "completions/min_length": 34.25, "epoch": 4.0029784065524945, "grad_norm": 23.79852587455537, "kl": 9.5966796875, "learning_rate": 6.632088702900467e-07, "loss": 0.06896506249904633, "memory(GiB)": 112.53, "reward": 1.4895833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2686, "train_speed(iter/s)": 0.028142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 415.75, "completions/mean_length": 116.48958396911621, "completions/min_length": 26.25, "epoch": 4.004467609828741, "grad_norm": 0.0036372713719617514, "kl": 0.31005859375, "learning_rate": 6.629853114035642e-07, "loss": 0.00031092212884686887, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2687, "train_speed(iter/s)": 0.028137 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.75, "completions/mean_length": 116.96875381469727, "completions/min_length": 34.75, "epoch": 4.005956813104989, "grad_norm": 0.0039045142076012704, "kl": 0.30029296875, "learning_rate": 6.627617160542443e-07, "loss": 0.0003007450432050973, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2688, "train_speed(iter/s)": 0.028136 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.75, "completions/mean_length": 104.83333587646484, "completions/min_length": 41.25, "epoch": 4.007446016381236, "grad_norm": 0.0038306019715282935, "kl": 0.326171875, "learning_rate": 6.625380842921093e-07, "loss": 0.0003261760575696826, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2689, "train_speed(iter/s)": 0.028134 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.0, "completions/mean_length": 105.26042175292969, "completions/min_length": 37.0, "epoch": 4.0089352196574835, "grad_norm": 3.5080900174867455, "kl": 0.322265625, "learning_rate": 6.623144161671899e-07, "loss": -0.0008841207018122077, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.4529181867837906, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2690, "train_speed(iter/s)": 0.028131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 103.76041984558105, "completions/min_length": 34.75, "epoch": 4.01042442293373, "grad_norm": 0.864226457357619, "kl": 0.32470703125, "learning_rate": 6.620907117295245e-07, "loss": 0.006834996398538351, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2691, "train_speed(iter/s)": 0.028132 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 96.57291793823242, "completions/min_length": 29.75, "epoch": 4.011913626209978, "grad_norm": 0.9963772537859646, "kl": 0.33935546875, "learning_rate": 6.618669710291606e-07, "loss": -0.011166771873831749, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2692, "train_speed(iter/s)": 0.028128 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.5, "completions/mean_length": 93.89583587646484, "completions/min_length": 28.75, "epoch": 4.013402829486225, "grad_norm": 2.1998922109832546, "kl": 0.34521484375, "learning_rate": 6.616431941161524e-07, "loss": -0.003929792437702417, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2693, "train_speed(iter/s)": 0.028126 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 106.47917175292969, "completions/min_length": 41.25, "epoch": 4.014892032762472, "grad_norm": 0.003709103873183173, "kl": 0.31884765625, "learning_rate": 6.614193810405635e-07, "loss": 0.0003184048109687865, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2694, "train_speed(iter/s)": 0.028126 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.5, "completions/mean_length": 115.71875190734863, "completions/min_length": 40.5, "epoch": 4.016381236038719, "grad_norm": 0.0038930108429504296, "kl": 0.28125, "learning_rate": 6.611955318524648e-07, "loss": 0.00028098589973524213, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2695, "train_speed(iter/s)": 0.028129 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 395.75, "completions/mean_length": 128.7187557220459, "completions/min_length": 42.5, "epoch": 4.017870439314967, "grad_norm": 0.0042727258416605275, "kl": 0.28125, "learning_rate": 6.609716466019355e-07, "loss": 0.000281866523437202, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2696, "train_speed(iter/s)": 0.028126 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.25, "completions/mean_length": 106.40625381469727, "completions/min_length": 39.0, "epoch": 4.019359642591214, "grad_norm": 0.003988737116201309, "kl": 0.3037109375, "learning_rate": 6.607477253390628e-07, "loss": 0.0003042837488465011, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2697, "train_speed(iter/s)": 0.028124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.0, "completions/mean_length": 100.79166984558105, "completions/min_length": 38.0, "epoch": 4.0208488458674605, "grad_norm": 0.7073151889998226, "kl": 0.35107421875, "learning_rate": 6.605237681139422e-07, "loss": -0.004949376918375492, "memory(GiB)": 112.53, "reward": 1.5104166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2698, "train_speed(iter/s)": 0.028124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.0, "completions/mean_length": 113.66666984558105, "completions/min_length": 47.25, "epoch": 4.022338049143708, "grad_norm": 0.005194937278997889, "kl": 0.28857421875, "learning_rate": 6.602997749766772e-07, "loss": 0.00028891843976452947, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2699, "train_speed(iter/s)": 0.028123 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.0, "completions/mean_length": 111.05208396911621, "completions/min_length": 36.0, "epoch": 4.023827252419955, "grad_norm": 0.0034728299326316097, "kl": 0.30078125, "learning_rate": 6.600757459773791e-07, "loss": 0.0003014708636328578, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2700, "train_speed(iter/s)": 0.028123 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.25, "completions/mean_length": 112.30208396911621, "completions/min_length": 48.5, "epoch": 4.025316455696203, "grad_norm": 1.9126489810352807, "kl": 0.32177734375, "learning_rate": 6.598516811661672e-07, "loss": 0.01723247393965721, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3568698540329933, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2701, "train_speed(iter/s)": 0.028125 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.25, "completions/mean_length": 104.09375190734863, "completions/min_length": 45.5, "epoch": 4.0268056589724495, "grad_norm": 1.5593523447975712, "kl": 0.32666015625, "learning_rate": 6.59627580593169e-07, "loss": -0.023274969309568405, "memory(GiB)": 112.53, "reward": 1.4583333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.4583333432674408, "rewards/CineAccuracyORM/std": 0.4681183323264122, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2702, "train_speed(iter/s)": 0.028127 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 110.27083587646484, "completions/min_length": 45.25, "epoch": 4.028294862248697, "grad_norm": 0.004401822425781713, "kl": 0.3095703125, "learning_rate": 6.594034443085201e-07, "loss": 0.0003095408028457314, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2703, "train_speed(iter/s)": 0.028122 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 111.61458587646484, "completions/min_length": 42.25, "epoch": 4.029784065524944, "grad_norm": 0.0034120087944587876, "kl": 0.30419921875, "learning_rate": 6.591792723623637e-07, "loss": 0.00030415208311751485, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2704, "train_speed(iter/s)": 0.028124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.0, "completions/mean_length": 116.91666984558105, "completions/min_length": 38.25, "epoch": 4.031273268801192, "grad_norm": 1.204584552113762, "kl": 0.29833984375, "learning_rate": 6.589550648048517e-07, "loss": 0.013820528984069824, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2705, "train_speed(iter/s)": 0.028121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.5, "completions/mean_length": 98.32291793823242, "completions/min_length": 42.75, "epoch": 4.032762472077438, "grad_norm": 0.5777178138812391, "kl": 0.35107421875, "learning_rate": 6.587308216861429e-07, "loss": 0.0007712878286838531, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2706, "train_speed(iter/s)": 0.028124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.5, "completions/mean_length": 118.21875190734863, "completions/min_length": 47.25, "epoch": 4.034251675353686, "grad_norm": 1.3729899163643589, "kl": 0.310546875, "learning_rate": 6.585065430564049e-07, "loss": -0.017010390758514404, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2707, "train_speed(iter/s)": 0.028121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 101.75000381469727, "completions/min_length": 38.75, "epoch": 4.035740878629933, "grad_norm": 0.0036276609619701485, "kl": 0.32373046875, "learning_rate": 6.582822289658134e-07, "loss": 0.00032333884155377746, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2708, "train_speed(iter/s)": 0.028124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.0, "completions/mean_length": 105.10416984558105, "completions/min_length": 47.5, "epoch": 4.037230081906181, "grad_norm": 0.003734592436767604, "kl": 0.31982421875, "learning_rate": 6.580578794645509e-07, "loss": 0.00031981090432964265, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2709, "train_speed(iter/s)": 0.028124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.0, "completions/mean_length": 107.91666984558105, "completions/min_length": 34.5, "epoch": 4.038719285182427, "grad_norm": 0.003927864938957631, "kl": 0.296875, "learning_rate": 6.578334946028089e-07, "loss": 0.00029714283300563693, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2710, "train_speed(iter/s)": 0.028121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 100.66666984558105, "completions/min_length": 40.25, "epoch": 4.040208488458674, "grad_norm": 0.004006016789656797, "kl": 0.31005859375, "learning_rate": 6.576090744307865e-07, "loss": 0.0003104261704720557, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2711, "train_speed(iter/s)": 0.028121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 112.12500381469727, "completions/min_length": 35.75, "epoch": 4.041697691734922, "grad_norm": 0.0034289577119935495, "kl": 0.2958984375, "learning_rate": 6.573846189986904e-07, "loss": 0.0002961692225653678, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2712, "train_speed(iter/s)": 0.028121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.75, "completions/mean_length": 113.17708587646484, "completions/min_length": 32.25, "epoch": 4.043186895011169, "grad_norm": 0.003969878680923413, "kl": 0.28857421875, "learning_rate": 6.57160128356736e-07, "loss": 0.0002887967275455594, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2713, "train_speed(iter/s)": 0.028118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.0, "completions/mean_length": 118.66666984558105, "completions/min_length": 39.25, "epoch": 4.044676098287416, "grad_norm": 0.0035065357354930634, "kl": 0.29150390625, "learning_rate": 6.569356025551454e-07, "loss": 0.000290569820208475, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2714, "train_speed(iter/s)": 0.028115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 106.06250190734863, "completions/min_length": 26.25, "epoch": 4.046165301563663, "grad_norm": 0.0034287929445808193, "kl": 0.318359375, "learning_rate": 6.567110416441495e-07, "loss": 0.00031826263875700533, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2715, "train_speed(iter/s)": 0.028112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 108.58333587646484, "completions/min_length": 50.5, "epoch": 4.047654504839911, "grad_norm": 0.03686783338892224, "kl": 0.3115234375, "learning_rate": 6.564864456739867e-07, "loss": 0.000311216339468956, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2716, "train_speed(iter/s)": 0.028114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.25, "completions/mean_length": 126.64583969116211, "completions/min_length": 42.5, "epoch": 4.049143708116158, "grad_norm": 0.003968670971265529, "kl": 0.28466796875, "learning_rate": 6.562618146949031e-07, "loss": 0.00028448691591620445, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2717, "train_speed(iter/s)": 0.028111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.5, "completions/mean_length": 123.38541793823242, "completions/min_length": 44.25, "epoch": 4.050632911392405, "grad_norm": 0.003755527641294408, "kl": 0.29052734375, "learning_rate": 6.560371487571532e-07, "loss": 0.0002902006381191313, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2718, "train_speed(iter/s)": 0.028111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.75, "completions/mean_length": 109.48958778381348, "completions/min_length": 49.0, "epoch": 4.052122114668652, "grad_norm": 1.7008945855310522, "kl": 0.30615234375, "learning_rate": 6.558124479109986e-07, "loss": -0.010350177064538002, "memory(GiB)": 112.53, "reward": 1.78125, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.78125, "rewards/CineAccuracyORM/std": 0.19503945857286453, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2719, "train_speed(iter/s)": 0.028108 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.75, "completions/mean_length": 127.4687557220459, "completions/min_length": 38.75, "epoch": 4.0536113179449, "grad_norm": 0.004477980737095448, "kl": 0.271728515625, "learning_rate": 6.555877122067093e-07, "loss": 0.00027169735403731465, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2720, "train_speed(iter/s)": 0.02811 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 412.0, "completions/mean_length": 142.88541984558105, "completions/min_length": 39.0, "epoch": 4.055100521221147, "grad_norm": 0.5684674454777991, "kl": 0.25634765625, "learning_rate": 6.553629416945627e-07, "loss": -0.024271374568343163, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2721, "train_speed(iter/s)": 0.02811 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.5, "completions/mean_length": 128.04166793823242, "completions/min_length": 42.5, "epoch": 4.056589724497394, "grad_norm": 1.2805117725477841, "kl": 0.266357421875, "learning_rate": 6.55138136424844e-07, "loss": 0.022817473858594894, "memory(GiB)": 112.53, "reward": 1.4895834028720856, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.4895833507180214, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2722, "train_speed(iter/s)": 0.028112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.0, "completions/mean_length": 132.625, "completions/min_length": 51.25, "epoch": 4.058078927773641, "grad_norm": 0.004120918148930413, "kl": 0.240478515625, "learning_rate": 6.549132964478465e-07, "loss": 0.00024048867635428905, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2723, "train_speed(iter/s)": 0.028114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.0, "completions/mean_length": 130.3645896911621, "completions/min_length": 45.5, "epoch": 4.059568131049888, "grad_norm": 1.1990548771604506, "kl": 0.26025390625, "learning_rate": 6.546884218138711e-07, "loss": -0.027109745889902115, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2724, "train_speed(iter/s)": 0.028113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.25, "completions/mean_length": 119.15625190734863, "completions/min_length": 45.5, "epoch": 4.061057334326136, "grad_norm": 0.005013681841672494, "kl": 0.275390625, "learning_rate": 6.544635125732263e-07, "loss": 0.00027531004161573946, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2725, "train_speed(iter/s)": 0.028112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 120.6875057220459, "completions/min_length": 48.0, "epoch": 4.062546537602382, "grad_norm": 0.7885915476150052, "kl": 0.26611328125, "learning_rate": 6.542385687762287e-07, "loss": -0.0054956283420324326, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2726, "train_speed(iter/s)": 0.028112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.75, "completions/mean_length": 130.10416984558105, "completions/min_length": 48.5, "epoch": 4.06403574087863, "grad_norm": 1.4104998514104856, "kl": 0.262939453125, "learning_rate": 6.540135904732018e-07, "loss": -0.030495604500174522, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000111758709, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2727, "train_speed(iter/s)": 0.028108 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.5, "completions/mean_length": 128.6666717529297, "completions/min_length": 47.0, "epoch": 4.065524944154877, "grad_norm": 0.7298858926613164, "kl": 0.262451171875, "learning_rate": 6.537885777144783e-07, "loss": -0.020735669881105423, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2728, "train_speed(iter/s)": 0.028107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.75, "completions/mean_length": 127.47917175292969, "completions/min_length": 50.75, "epoch": 4.0670141474311245, "grad_norm": 1.6560340956883737, "kl": 0.256103515625, "learning_rate": 6.53563530550397e-07, "loss": 0.01801055669784546, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2729, "train_speed(iter/s)": 0.028104 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 407.0, "completions/mean_length": 131.08333778381348, "completions/min_length": 45.5, "epoch": 4.068503350707371, "grad_norm": 1.4268083161148577, "kl": 0.2724609375, "learning_rate": 6.533384490313054e-07, "loss": 0.029865950345993042, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2730, "train_speed(iter/s)": 0.028099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.5, "completions/mean_length": 121.18750381469727, "completions/min_length": 49.0, "epoch": 4.069992553983619, "grad_norm": 0.9844128934702531, "kl": 0.26904296875, "learning_rate": 6.531133332075586e-07, "loss": 0.010909616015851498, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2731, "train_speed(iter/s)": 0.028096 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.75, "completions/mean_length": 122.33333969116211, "completions/min_length": 42.0, "epoch": 4.071481757259866, "grad_norm": 1.0674632782193734, "kl": 0.2802734375, "learning_rate": 6.528881831295188e-07, "loss": -0.02626706473529339, "memory(GiB)": 112.53, "reward": 1.6041667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2732, "train_speed(iter/s)": 0.028093 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.25, "completions/mean_length": 115.63541793823242, "completions/min_length": 46.5, "epoch": 4.0729709605361135, "grad_norm": 2.420146225090754, "kl": 0.3193359375, "learning_rate": 6.526629988475566e-07, "loss": 0.0029032069724053144, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2733, "train_speed(iter/s)": 0.028092 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.25, "completions/mean_length": 124.86458778381348, "completions/min_length": 52.25, "epoch": 4.07446016381236, "grad_norm": 0.00371024818635318, "kl": 0.27685546875, "learning_rate": 6.524377804120497e-07, "loss": 0.0002763587690424174, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2734, "train_speed(iter/s)": 0.028095 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.25, "completions/mean_length": 136.6354217529297, "completions/min_length": 48.75, "epoch": 4.075949367088608, "grad_norm": 0.003707830503734793, "kl": 0.268310546875, "learning_rate": 6.522125278733835e-07, "loss": 0.0002679177559912205, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2735, "train_speed(iter/s)": 0.028092 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.25, "completions/mean_length": 135.37500190734863, "completions/min_length": 41.25, "epoch": 4.077438570364855, "grad_norm": 0.004056863315631022, "kl": 0.279296875, "learning_rate": 6.519872412819515e-07, "loss": 0.00027956615667790174, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2736, "train_speed(iter/s)": 0.028086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.0, "completions/mean_length": 123.89583969116211, "completions/min_length": 37.5, "epoch": 4.078927773641102, "grad_norm": 0.9680626010562253, "kl": 0.272705078125, "learning_rate": 6.517619206881544e-07, "loss": -0.015132236294448376, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2737, "train_speed(iter/s)": 0.028085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.75, "completions/mean_length": 123.85417175292969, "completions/min_length": 46.25, "epoch": 4.080416976917349, "grad_norm": 2.127118078257928, "kl": 0.2890625, "learning_rate": 6.515365661424006e-07, "loss": -0.001877465983852744, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2738, "train_speed(iter/s)": 0.028085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.25, "completions/mean_length": 132.23958778381348, "completions/min_length": 53.75, "epoch": 4.081906180193596, "grad_norm": 0.003637623688769683, "kl": 0.28564453125, "learning_rate": 6.51311177695106e-07, "loss": 0.0002859306987375021, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2739, "train_speed(iter/s)": 0.028087 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 352.5, "completions/mean_length": 130.69791793823242, "completions/min_length": 47.25, "epoch": 4.083395383469844, "grad_norm": 0.004717328133235926, "kl": 0.2724609375, "learning_rate": 6.510857553966941e-07, "loss": 0.00027289980789646506, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2740, "train_speed(iter/s)": 0.028089 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 385.75, "completions/mean_length": 147.81250381469727, "completions/min_length": 43.75, "epoch": 4.084884586746091, "grad_norm": 0.004271360256113896, "kl": 0.2763671875, "learning_rate": 6.508602992975962e-07, "loss": 0.0002760774805210531, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2741, "train_speed(iter/s)": 0.028087 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.75, "completions/mean_length": 130.48958778381348, "completions/min_length": 47.25, "epoch": 4.086373790022338, "grad_norm": 0.005508560259749275, "kl": 0.2666015625, "learning_rate": 6.506348094482509e-07, "loss": 0.000266348069999367, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2742, "train_speed(iter/s)": 0.028086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 389.75, "completions/mean_length": 141.32292366027832, "completions/min_length": 47.0, "epoch": 4.087862993298585, "grad_norm": 0.0033428555110726174, "kl": 0.26171875, "learning_rate": 6.504092858991046e-07, "loss": 0.00026092203916050494, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2743, "train_speed(iter/s)": 0.028084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/mean_length": 159.6770896911621, "completions/min_length": 49.5, "epoch": 4.089352196574833, "grad_norm": 0.012838433622092114, "kl": 0.251220703125, "learning_rate": 6.501837287006111e-07, "loss": 0.0002514052321203053, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2744, "train_speed(iter/s)": 0.028077 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.25, "completions/mean_length": 148.21875381469727, "completions/min_length": 47.5, "epoch": 4.0908413998510795, "grad_norm": 0.003752443443155949, "kl": 0.2451171875, "learning_rate": 6.499581379032318e-07, "loss": 0.0002448821614962071, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2745, "train_speed(iter/s)": 0.028079 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.25, "completions/mean_length": 132.37500381469727, "completions/min_length": 56.0, "epoch": 4.092330603127327, "grad_norm": 0.004124582672077434, "kl": 0.28515625, "learning_rate": 6.497325135574351e-07, "loss": 0.0002852778707165271, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2746, "train_speed(iter/s)": 0.028081 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 471.75, "completions/mean_length": 152.81250381469727, "completions/min_length": 49.75, "epoch": 4.093819806403574, "grad_norm": 0.003861276561159241, "kl": 0.250732421875, "learning_rate": 6.495068557136978e-07, "loss": 0.00025041267508640885, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2747, "train_speed(iter/s)": 0.028075 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 370.75, "completions/mean_length": 140.6145896911621, "completions/min_length": 43.75, "epoch": 4.095309009679822, "grad_norm": 0.003837277516526197, "kl": 0.267333984375, "learning_rate": 6.492811644225036e-07, "loss": 0.00026751525001600385, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2748, "train_speed(iter/s)": 0.028074 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/mean_length": 146.3020896911621, "completions/min_length": 36.0, "epoch": 4.0967982129560685, "grad_norm": 0.8445847898489075, "kl": 0.2724609375, "learning_rate": 6.490554397343438e-07, "loss": -0.0017508211312815547, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2749, "train_speed(iter/s)": 0.028067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 380.25, "completions/mean_length": 165.18750762939453, "completions/min_length": 60.0, "epoch": 4.098287416232315, "grad_norm": 0.0031971649860173127, "kl": 0.237060546875, "learning_rate": 6.488296816997173e-07, "loss": 0.00023703501210547984, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2750, "train_speed(iter/s)": 0.028063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.75, "completions/mean_length": 134.19792366027832, "completions/min_length": 43.25, "epoch": 4.099776619508563, "grad_norm": 0.0041660626556917005, "kl": 0.27001953125, "learning_rate": 6.486038903691302e-07, "loss": 0.0002699038595892489, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2751, "train_speed(iter/s)": 0.028065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.5, "completions/mean_length": 133.25000190734863, "completions/min_length": 58.5, "epoch": 4.10126582278481, "grad_norm": 0.4962126915226718, "kl": 0.271484375, "learning_rate": 6.483780657930964e-07, "loss": 0.024517929181456566, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333414047956, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2752, "train_speed(iter/s)": 0.028065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 378.5, "completions/mean_length": 148.37500762939453, "completions/min_length": 40.25, "epoch": 4.1027550260610575, "grad_norm": 0.0032557770419223597, "kl": 0.25, "learning_rate": 6.48152208022137e-07, "loss": 0.00024991441750898957, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2753, "train_speed(iter/s)": 0.028063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.25, "completions/mean_length": 146.02083778381348, "completions/min_length": 44.0, "epoch": 4.104244229337304, "grad_norm": 0.0041128825790235435, "kl": 0.254638671875, "learning_rate": 6.479263171067804e-07, "loss": 0.000254751939792186, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2754, "train_speed(iter/s)": 0.028063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 377.25, "completions/mean_length": 150.06250190734863, "completions/min_length": 44.75, "epoch": 4.105733432613552, "grad_norm": 0.0907625961330948, "kl": 0.289306640625, "learning_rate": 6.477003930975627e-07, "loss": 0.0002887907612603158, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2755, "train_speed(iter/s)": 0.028061 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 331.5, "completions/mean_length": 137.4583396911621, "completions/min_length": 40.0, "epoch": 4.107222635889799, "grad_norm": 0.0037391945986512353, "kl": 0.272216796875, "learning_rate": 6.474744360450274e-07, "loss": 0.00027233161381445825, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2756, "train_speed(iter/s)": 0.028058 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.5, "completions/mean_length": 144.8333396911621, "completions/min_length": 49.0, "epoch": 4.108711839166046, "grad_norm": 0.08713227228523425, "kl": 0.29638671875, "learning_rate": 6.472484459997251e-07, "loss": 0.0002963139850180596, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2757, "train_speed(iter/s)": 0.028054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.5, "completions/mean_length": 145.44792556762695, "completions/min_length": 43.5, "epoch": 4.110201042442293, "grad_norm": 1.5277008652268853, "kl": 0.28173828125, "learning_rate": 6.470224230122142e-07, "loss": 0.034409862011671066, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.49355606734752655, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2758, "train_speed(iter/s)": 0.028051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 433.5, "completions/mean_length": 136.41667366027832, "completions/min_length": 38.5, "epoch": 4.111690245718541, "grad_norm": 0.9882562784063038, "kl": 0.2763671875, "learning_rate": 6.4679636713306e-07, "loss": 0.009743919596076012, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 2759, "train_speed(iter/s)": 0.028048 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.5, "completions/mean_length": 126.54166984558105, "completions/min_length": 20.25, "epoch": 4.113179448994788, "grad_norm": 1.36631484369973, "kl": 0.26708984375, "learning_rate": 6.465702784128354e-07, "loss": -0.0037497261073440313, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2760, "train_speed(iter/s)": 0.028044 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 353.25, "completions/mean_length": 147.46875381469727, "completions/min_length": 63.0, "epoch": 4.114668652271035, "grad_norm": 0.6890695344442046, "kl": 0.2470703125, "learning_rate": 6.463441569021207e-07, "loss": 0.02453605644404888, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2761, "train_speed(iter/s)": 0.028046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.5, "completions/mean_length": 141.36458587646484, "completions/min_length": 45.25, "epoch": 4.116157855547282, "grad_norm": 0.0044770757942651975, "kl": 0.25732421875, "learning_rate": 6.461180026515038e-07, "loss": 0.00025749902124516666, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2762, "train_speed(iter/s)": 0.028047 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.0, "completions/mean_length": 134.69791984558105, "completions/min_length": 42.25, "epoch": 4.117647058823529, "grad_norm": 0.003630732203307717, "kl": 0.275390625, "learning_rate": 6.45891815711579e-07, "loss": 0.0002754841116257012, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2763, "train_speed(iter/s)": 0.028044 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.5, "completions/mean_length": 118.00000381469727, "completions/min_length": 50.25, "epoch": 4.119136262099777, "grad_norm": 0.003965963760537683, "kl": 0.276611328125, "learning_rate": 6.456655961329491e-07, "loss": 0.0002766760590020567, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2764, "train_speed(iter/s)": 0.028046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.25, "completions/mean_length": 131.29166793823242, "completions/min_length": 41.75, "epoch": 4.1206254653760235, "grad_norm": 1.0434491266651094, "kl": 0.25048828125, "learning_rate": 6.454393439662229e-07, "loss": 0.005587979219853878, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2765, "train_speed(iter/s)": 0.028048 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.5, "completions/mean_length": 113.17708969116211, "completions/min_length": 33.25, "epoch": 4.122114668652271, "grad_norm": 3.572280502763282, "kl": 0.3505859375, "learning_rate": 6.45213059262018e-07, "loss": 0.05069075524806976, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24376489222049713, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2766, "train_speed(iter/s)": 0.02805 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.75, "completions/mean_length": 109.93750381469727, "completions/min_length": 37.0, "epoch": 4.123603871928518, "grad_norm": 0.8457031213750322, "kl": 0.29541015625, "learning_rate": 6.449867420709578e-07, "loss": -0.011102491989731789, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2767, "train_speed(iter/s)": 0.028049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.5, "completions/mean_length": 108.08333587646484, "completions/min_length": 43.5, "epoch": 4.125093075204766, "grad_norm": 0.005690096428807975, "kl": 0.3076171875, "learning_rate": 6.447603924436743e-07, "loss": 0.0003076803404837847, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2768, "train_speed(iter/s)": 0.028049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/mean_length": 116.58333587646484, "completions/min_length": 51.75, "epoch": 4.1265822784810124, "grad_norm": 0.32088167144207114, "kl": 0.3564453125, "learning_rate": 6.445340104308057e-07, "loss": 0.00035635678796097636, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2769, "train_speed(iter/s)": 0.028048 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/mean_length": 120.08333587646484, "completions/min_length": 47.0, "epoch": 4.12807148175726, "grad_norm": 0.0036150864521627447, "kl": 0.27978515625, "learning_rate": 6.443075960829977e-07, "loss": 0.0002788605052046478, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2770, "train_speed(iter/s)": 0.028045 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.0, "completions/mean_length": 109.98958587646484, "completions/min_length": 36.25, "epoch": 4.129560685033507, "grad_norm": 0.058058111048150944, "kl": 0.31689453125, "learning_rate": 6.440811494509039e-07, "loss": 0.0003174185403622687, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2771, "train_speed(iter/s)": 0.028048 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.5, "completions/mean_length": 125.86458969116211, "completions/min_length": 34.5, "epoch": 4.131049888309755, "grad_norm": 0.003785967303716612, "kl": 0.274169921875, "learning_rate": 6.438546705851843e-07, "loss": 0.0002740665222518146, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2772, "train_speed(iter/s)": 0.028045 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 368.5, "completions/mean_length": 117.41667175292969, "completions/min_length": 40.0, "epoch": 4.132539091586001, "grad_norm": 1.2811636153134427, "kl": 0.28662109375, "learning_rate": 6.436281595365065e-07, "loss": -0.004131180699914694, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2773, "train_speed(iter/s)": 0.028046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 98.72916793823242, "completions/min_length": 29.5, "epoch": 4.134028294862249, "grad_norm": 1.2167353376210974, "kl": 0.3203125, "learning_rate": 6.434016163555451e-07, "loss": 0.0014738885220140219, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2558748833835125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2774, "train_speed(iter/s)": 0.028049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.75, "completions/mean_length": 105.90625190734863, "completions/min_length": 26.25, "epoch": 4.135517498138496, "grad_norm": 0.8520873338897967, "kl": 0.29541015625, "learning_rate": 6.431750410929821e-07, "loss": 0.002800963819026947, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2775, "train_speed(iter/s)": 0.028049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 110.93750190734863, "completions/min_length": 33.0, "epoch": 4.137006701414743, "grad_norm": 0.003931074358888995, "kl": 0.30908203125, "learning_rate": 6.429484337995067e-07, "loss": 0.0003096314030699432, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2776, "train_speed(iter/s)": 0.028049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.5, "completions/mean_length": 109.82291793823242, "completions/min_length": 42.75, "epoch": 4.13849590469099, "grad_norm": 0.004954837896669291, "kl": 0.3046875, "learning_rate": 6.427217945258149e-07, "loss": 0.00030467903707176447, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2777, "train_speed(iter/s)": 0.028042 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.5, "completions/mean_length": 99.59375381469727, "completions/min_length": 14.75, "epoch": 4.139985107967237, "grad_norm": 0.004029666932781348, "kl": 0.31494140625, "learning_rate": 6.424951233226104e-07, "loss": 0.00031426839996129274, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2778, "train_speed(iter/s)": 0.02804 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.75, "completions/mean_length": 107.60416984558105, "completions/min_length": 38.25, "epoch": 4.141474311243485, "grad_norm": 0.0038585143665705677, "kl": 0.294921875, "learning_rate": 6.422684202406035e-07, "loss": 0.0002952414215542376, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2779, "train_speed(iter/s)": 0.028042 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/mean_length": 104.26042175292969, "completions/min_length": 31.75, "epoch": 4.142963514519732, "grad_norm": 0.004032904251778041, "kl": 0.310546875, "learning_rate": 6.420416853305119e-07, "loss": 0.0003106370859313756, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2780, "train_speed(iter/s)": 0.028042 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 104.37500190734863, "completions/min_length": 35.25, "epoch": 4.144452717795979, "grad_norm": 0.008071276000974072, "kl": 0.2861328125, "learning_rate": 6.418149186430607e-07, "loss": 0.00028644659323617816, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2781, "train_speed(iter/s)": 0.028042 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/mean_length": 121.94792175292969, "completions/min_length": 52.5, "epoch": 4.145941921072226, "grad_norm": 0.00372472586179269, "kl": 0.28759765625, "learning_rate": 6.415881202289816e-07, "loss": 0.00028794497484341264, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2782, "train_speed(iter/s)": 0.02804 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.25, "completions/mean_length": 110.18750190734863, "completions/min_length": 18.75, "epoch": 4.147431124348474, "grad_norm": 0.005393915029110778, "kl": 0.31103515625, "learning_rate": 6.413612901390136e-07, "loss": 0.00031143613159656525, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2783, "train_speed(iter/s)": 0.028042 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.75, "completions/mean_length": 107.65625190734863, "completions/min_length": 43.25, "epoch": 4.148920327624721, "grad_norm": 0.0041572605752485045, "kl": 0.302734375, "learning_rate": 6.411344284239028e-07, "loss": 0.00030298432102426887, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2784, "train_speed(iter/s)": 0.028037 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 109.19791793823242, "completions/min_length": 32.0, "epoch": 4.150409530900968, "grad_norm": 0.004341058717135886, "kl": 0.30126953125, "learning_rate": 6.409075351344022e-07, "loss": 0.0003013546811416745, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2785, "train_speed(iter/s)": 0.028031 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.5, "completions/mean_length": 105.50000381469727, "completions/min_length": 35.25, "epoch": 4.151898734177215, "grad_norm": 0.004567685728975862, "kl": 0.29638671875, "learning_rate": 6.406806103212724e-07, "loss": 0.0002965755993500352, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2786, "train_speed(iter/s)": 0.028033 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.5, "completions/mean_length": 109.71875190734863, "completions/min_length": 32.0, "epoch": 4.153387937453463, "grad_norm": 0.004828302779774241, "kl": 0.29736328125, "learning_rate": 6.404536540352805e-07, "loss": 0.0002976791001856327, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2787, "train_speed(iter/s)": 0.02803 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.25, "completions/mean_length": 116.94791984558105, "completions/min_length": 42.5, "epoch": 4.15487714072971, "grad_norm": 0.0047628939209753305, "kl": 0.29150390625, "learning_rate": 6.402266663272009e-07, "loss": 0.00029112008633092046, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2788, "train_speed(iter/s)": 0.028027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 107.01041793823242, "completions/min_length": 41.75, "epoch": 4.156366344005956, "grad_norm": 0.004260332229270272, "kl": 0.30078125, "learning_rate": 6.399996472478148e-07, "loss": 0.00030078983400017023, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2789, "train_speed(iter/s)": 0.028027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.0, "completions/mean_length": 116.90625381469727, "completions/min_length": 36.75, "epoch": 4.157855547282204, "grad_norm": 0.003959215896512575, "kl": 0.296875, "learning_rate": 6.39772596847911e-07, "loss": 0.00029619672568514943, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2790, "train_speed(iter/s)": 0.028021 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 102.67708587646484, "completions/min_length": 38.5, "epoch": 4.159344750558451, "grad_norm": 1.6475339813325791, "kl": 0.30419921875, "learning_rate": 6.395455151782844e-07, "loss": -0.0057607414200901985, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2791, "train_speed(iter/s)": 0.028024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 101.20833587646484, "completions/min_length": 34.0, "epoch": 4.160833953834699, "grad_norm": 2.7458121062233456, "kl": 0.3662109375, "learning_rate": 6.393184022897375e-07, "loss": 0.015517737716436386, "memory(GiB)": 112.53, "reward": 1.447916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.447916679084301, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2792, "train_speed(iter/s)": 0.028022 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.5, "completions/mean_length": 112.37500381469727, "completions/min_length": 43.5, "epoch": 4.162323157110945, "grad_norm": 0.004128356023038034, "kl": 0.30029296875, "learning_rate": 6.390912582330797e-07, "loss": 0.0003003720776177943, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2793, "train_speed(iter/s)": 0.028025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 104.81250190734863, "completions/min_length": 31.5, "epoch": 4.163812360387193, "grad_norm": 0.0048226910073136744, "kl": 0.30029296875, "learning_rate": 6.388640830591274e-07, "loss": 0.00029989518225193024, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2794, "train_speed(iter/s)": 0.028025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 105.14583587646484, "completions/min_length": 26.25, "epoch": 4.16530156366344, "grad_norm": 0.004004166590597993, "kl": 0.32373046875, "learning_rate": 6.386368768187039e-07, "loss": 0.0003238657955080271, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2795, "train_speed(iter/s)": 0.028028 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 93.08333396911621, "completions/min_length": 29.0, "epoch": 4.1667907669396875, "grad_norm": 0.003812501508336637, "kl": 0.3271484375, "learning_rate": 6.384096395626395e-07, "loss": 0.00032681177253834903, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2796, "train_speed(iter/s)": 0.028026 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.5, "completions/mean_length": 105.61458587646484, "completions/min_length": 33.25, "epoch": 4.168279970215934, "grad_norm": 1.5431729488770682, "kl": 0.384765625, "learning_rate": 6.381823713417713e-07, "loss": -0.0002116375690093264, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2797, "train_speed(iter/s)": 0.028026 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 99.38541793823242, "completions/min_length": 31.5, "epoch": 4.169769173492182, "grad_norm": 1.6775662381265755, "kl": 0.30419921875, "learning_rate": 6.379550722069432e-07, "loss": -0.048223480582237244, "memory(GiB)": 112.53, "reward": 1.875, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.1276884824037552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2798, "train_speed(iter/s)": 0.028023 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.25, "completions/mean_length": 110.67708396911621, "completions/min_length": 33.25, "epoch": 4.171258376768429, "grad_norm": 2.0746135775980306, "kl": 0.316650390625, "learning_rate": 6.377277422090066e-07, "loss": -0.017135698348283768, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.3154253140091896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2799, "train_speed(iter/s)": 0.028023 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.5, "completions/mean_length": 109.87500381469727, "completions/min_length": 41.25, "epoch": 4.1727475800446765, "grad_norm": 0.004080502487896426, "kl": 0.30029296875, "learning_rate": 6.375003813988194e-07, "loss": 0.0003007349150720984, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2800, "train_speed(iter/s)": 0.028023 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.5, "completions/mean_length": 114.80208778381348, "completions/min_length": 40.75, "epoch": 4.174236783320923, "grad_norm": 0.007127991721531304, "kl": 0.29443359375, "learning_rate": 6.372729898272462e-07, "loss": 0.0002946941531263292, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2801, "train_speed(iter/s)": 0.028017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.5, "completions/mean_length": 111.08333587646484, "completions/min_length": 26.75, "epoch": 4.17572598659717, "grad_norm": 1.4985242251694122, "kl": 0.306640625, "learning_rate": 6.370455675451589e-07, "loss": -0.009491318836808205, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2802, "train_speed(iter/s)": 0.028019 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.25, "completions/mean_length": 124.34375190734863, "completions/min_length": 50.25, "epoch": 4.177215189873418, "grad_norm": 0.004672559492559356, "kl": 0.28076171875, "learning_rate": 6.368181146034361e-07, "loss": 0.00028007954824715853, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2803, "train_speed(iter/s)": 0.028021 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 107.72916793823242, "completions/min_length": 47.0, "epoch": 4.178704393149665, "grad_norm": 0.7622130090720171, "kl": 0.548828125, "learning_rate": 6.365906310529629e-07, "loss": -0.008066166192293167, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2804, "train_speed(iter/s)": 0.028021 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.25, "completions/mean_length": 129.09375190734863, "completions/min_length": 42.75, "epoch": 4.180193596425912, "grad_norm": 0.0041811315348719005, "kl": 0.27197265625, "learning_rate": 6.36363116944632e-07, "loss": 0.0002724021614994854, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2805, "train_speed(iter/s)": 0.028018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 124.69791984558105, "completions/min_length": 49.5, "epoch": 4.181682799702159, "grad_norm": 0.6646897878553556, "kl": 0.26904296875, "learning_rate": 6.361355723293424e-07, "loss": -0.013179915957152843, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2806, "train_speed(iter/s)": 0.028016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 335.75, "completions/mean_length": 127.14583587646484, "completions/min_length": 53.25, "epoch": 4.183172002978407, "grad_norm": 0.004571456257799811, "kl": 0.27294921875, "learning_rate": 6.35907997258e-07, "loss": 0.00027227538521401584, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2807, "train_speed(iter/s)": 0.028015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 339.25, "completions/mean_length": 131.2916717529297, "completions/min_length": 58.25, "epoch": 4.1846612062546535, "grad_norm": 1.7906530977580366, "kl": 0.260009765625, "learning_rate": 6.356803917815176e-07, "loss": -0.0006353402859531343, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2808, "train_speed(iter/s)": 0.028013 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.5, "completions/mean_length": 112.93750190734863, "completions/min_length": 55.0, "epoch": 4.186150409530901, "grad_norm": 0.004652219033670824, "kl": 0.2861328125, "learning_rate": 6.354527559508148e-07, "loss": 0.0002861579123418778, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2809, "train_speed(iter/s)": 0.028014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.0, "completions/mean_length": 133.73958587646484, "completions/min_length": 45.25, "epoch": 4.187639612807148, "grad_norm": 1.4950631699537098, "kl": 0.71630859375, "learning_rate": 6.35225089816818e-07, "loss": 0.02452090010046959, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.12169522047042847, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.3967231586575508, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2810, "train_speed(iter/s)": 0.028007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 328.5, "completions/mean_length": 132.83333587646484, "completions/min_length": 51.0, "epoch": 4.189128816083396, "grad_norm": 0.005138540708343227, "kl": 0.28125, "learning_rate": 6.3499739343046e-07, "loss": 0.0002808488789014518, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2811, "train_speed(iter/s)": 0.028002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 124.33333778381348, "completions/min_length": 42.0, "epoch": 4.1906180193596425, "grad_norm": 0.004592109789385524, "kl": 0.26513671875, "learning_rate": 6.347696668426812e-07, "loss": 0.0002650176174938679, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2812, "train_speed(iter/s)": 0.027997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.75, "completions/mean_length": 117.58333587646484, "completions/min_length": 39.75, "epoch": 4.19210722263589, "grad_norm": 0.003675593873511241, "kl": 0.29150390625, "learning_rate": 6.345419101044281e-07, "loss": 0.0002916600205935538, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2813, "train_speed(iter/s)": 0.027995 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.5, "completions/mean_length": 139.7083396911621, "completions/min_length": 39.5, "epoch": 4.193596425912137, "grad_norm": 0.004595206369517625, "kl": 0.27685546875, "learning_rate": 6.343141232666537e-07, "loss": 0.000276760954875499, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2814, "train_speed(iter/s)": 0.027992 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.75, "completions/mean_length": 136.17708778381348, "completions/min_length": 54.25, "epoch": 4.195085629188384, "grad_norm": 0.005600889292295463, "kl": 0.26220703125, "learning_rate": 6.340863063803187e-07, "loss": 0.0002624467306304723, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2815, "train_speed(iter/s)": 0.027988 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.5, "completions/mean_length": 135.4791717529297, "completions/min_length": 49.75, "epoch": 4.1965748324646315, "grad_norm": 0.006349781709599478, "kl": 0.2578125, "learning_rate": 6.338584594963897e-07, "loss": 0.00025754893431439996, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2816, "train_speed(iter/s)": 0.027985 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.0, "completions/mean_length": 121.68750190734863, "completions/min_length": 48.5, "epoch": 4.198064035740878, "grad_norm": 0.004222731851232725, "kl": 0.26953125, "learning_rate": 6.336305826658402e-07, "loss": 0.00026902393437922, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2817, "train_speed(iter/s)": 0.027984 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.75, "completions/mean_length": 142.86458587646484, "completions/min_length": 54.5, "epoch": 4.199553239017126, "grad_norm": 0.005420323516674376, "kl": 0.243408203125, "learning_rate": 6.334026759396507e-07, "loss": 0.00024338944058399647, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2818, "train_speed(iter/s)": 0.027978 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.75, "completions/mean_length": 130.61458778381348, "completions/min_length": 47.75, "epoch": 4.201042442293373, "grad_norm": 0.006950328623125482, "kl": 0.267578125, "learning_rate": 6.331747393688081e-07, "loss": 0.0002674249990377575, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2819, "train_speed(iter/s)": 0.027979 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 403.25, "completions/mean_length": 143.80208778381348, "completions/min_length": 51.0, "epoch": 4.2025316455696204, "grad_norm": 0.0049562741018220466, "kl": 0.25732421875, "learning_rate": 6.329467730043058e-07, "loss": 0.00025737524265423417, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2820, "train_speed(iter/s)": 0.027977 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.0, "completions/mean_length": 129.84375, "completions/min_length": 48.75, "epoch": 4.204020848845867, "grad_norm": 0.00537913003851445, "kl": 0.2802734375, "learning_rate": 6.327187768971445e-07, "loss": 0.00027989211957901716, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2821, "train_speed(iter/s)": 0.027974 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 394.25, "completions/mean_length": 146.9270896911621, "completions/min_length": 38.0, "epoch": 4.205510052122115, "grad_norm": 0.8998676176191847, "kl": 0.25048828125, "learning_rate": 6.32490751098331e-07, "loss": 0.037415605038404465, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2822, "train_speed(iter/s)": 0.027967 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.5, "completions/mean_length": 132.1041717529297, "completions/min_length": 43.5, "epoch": 4.206999255398362, "grad_norm": 0.004572297020357294, "kl": 0.25439453125, "learning_rate": 6.322626956588786e-07, "loss": 0.00025423322222195566, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2823, "train_speed(iter/s)": 0.027963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.75, "completions/mean_length": 137.65625762939453, "completions/min_length": 55.75, "epoch": 4.208488458674609, "grad_norm": 0.005757402845946149, "kl": 0.26904296875, "learning_rate": 6.320346106298078e-07, "loss": 0.0002688372624106705, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2824, "train_speed(iter/s)": 0.02796 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.0, "completions/mean_length": 127.45833587646484, "completions/min_length": 49.0, "epoch": 4.209977661950856, "grad_norm": 0.00528585637371309, "kl": 0.2802734375, "learning_rate": 6.318064960621455e-07, "loss": 0.00027994727133773267, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2825, "train_speed(iter/s)": 0.02796 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.0, "completions/mean_length": 130.08333587646484, "completions/min_length": 59.0, "epoch": 4.211466865227104, "grad_norm": 0.006163098446492389, "kl": 0.268310546875, "learning_rate": 6.315783520069251e-07, "loss": 0.0002680903417058289, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2826, "train_speed(iter/s)": 0.027956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 118.90625381469727, "completions/min_length": 42.75, "epoch": 4.212956068503351, "grad_norm": 0.004664044899500848, "kl": 0.2705078125, "learning_rate": 6.313501785151868e-07, "loss": 0.0002709916443563998, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2827, "train_speed(iter/s)": 0.027955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.25, "completions/mean_length": 143.4895896911621, "completions/min_length": 57.0, "epoch": 4.2144452717795975, "grad_norm": 0.0038913691055622876, "kl": 0.2529296875, "learning_rate": 6.311219756379768e-07, "loss": 0.0002531177597120404, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2828, "train_speed(iter/s)": 0.027953 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 342.5, "completions/mean_length": 146.52083778381348, "completions/min_length": 42.25, "epoch": 4.215934475055845, "grad_norm": 0.6201839502323533, "kl": 0.24951171875, "learning_rate": 6.308937434263488e-07, "loss": -3.371521597728133e-05, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2829, "train_speed(iter/s)": 0.027947 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.5, "completions/mean_length": 145.1041717529297, "completions/min_length": 50.0, "epoch": 4.217423678332092, "grad_norm": 0.8564414431926956, "kl": 0.256591796875, "learning_rate": 6.306654819313624e-07, "loss": 0.032498665153980255, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2830, "train_speed(iter/s)": 0.027946 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.75, "completions/mean_length": 121.57291984558105, "completions/min_length": 31.75, "epoch": 4.21891288160834, "grad_norm": 0.878648080784023, "kl": 0.27880859375, "learning_rate": 6.304371912040839e-07, "loss": 0.0058187684044241905, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2831, "train_speed(iter/s)": 0.027946 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 372.0, "completions/mean_length": 140.86458587646484, "completions/min_length": 52.5, "epoch": 4.2204020848845865, "grad_norm": 0.6395870677843764, "kl": 0.259033203125, "learning_rate": 6.302088712955862e-07, "loss": -0.003221792634576559, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2832, "train_speed(iter/s)": 0.027944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.5, "completions/mean_length": 144.5520896911621, "completions/min_length": 58.5, "epoch": 4.221891288160834, "grad_norm": 0.004567848954059585, "kl": 0.236572265625, "learning_rate": 6.299805222569486e-07, "loss": 0.00023673821124248207, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2833, "train_speed(iter/s)": 0.027944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.5, "completions/mean_length": 119.30208778381348, "completions/min_length": 38.75, "epoch": 4.223380491437081, "grad_norm": 0.004352875979630472, "kl": 0.27001953125, "learning_rate": 6.297521441392571e-07, "loss": 0.00027035479433834553, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2834, "train_speed(iter/s)": 0.027946 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 355.25, "completions/mean_length": 136.4895896911621, "completions/min_length": 48.5, "epoch": 4.224869694713329, "grad_norm": 0.004788946163232786, "kl": 0.25927734375, "learning_rate": 6.295237369936043e-07, "loss": 0.0002595728437881917, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2835, "train_speed(iter/s)": 0.027943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.5, "completions/mean_length": 143.3229217529297, "completions/min_length": 45.75, "epoch": 4.226358897989575, "grad_norm": 0.6170173577522504, "kl": 0.2568359375, "learning_rate": 6.292953008710887e-07, "loss": 0.00580214336514473, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2836, "train_speed(iter/s)": 0.027939 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.0, "completions/mean_length": 123.02083587646484, "completions/min_length": 39.0, "epoch": 4.227848101265823, "grad_norm": 0.004695174089703133, "kl": 0.28173828125, "learning_rate": 6.290668358228162e-07, "loss": 0.0002813751925714314, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2837, "train_speed(iter/s)": 0.027936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.0, "completions/mean_length": 114.81250381469727, "completions/min_length": 51.5, "epoch": 4.22933730454207, "grad_norm": 0.014554297371864549, "kl": 0.2705078125, "learning_rate": 6.288383418998982e-07, "loss": 0.0002709000837057829, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2838, "train_speed(iter/s)": 0.027938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 114.31250381469727, "completions/min_length": 39.25, "epoch": 4.230826507818318, "grad_norm": 0.005036260353068013, "kl": 0.28564453125, "learning_rate": 6.286098191534532e-07, "loss": 0.0002855927450582385, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2839, "train_speed(iter/s)": 0.027936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.75, "completions/mean_length": 122.58333396911621, "completions/min_length": 43.25, "epoch": 4.232315711094564, "grad_norm": 0.9357164405631802, "kl": 0.28369140625, "learning_rate": 6.283812676346063e-07, "loss": 0.011057589203119278, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2840, "train_speed(iter/s)": 0.027938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 338.0, "completions/mean_length": 120.72916984558105, "completions/min_length": 31.5, "epoch": 4.233804914370811, "grad_norm": 0.7156915561034588, "kl": 0.2744140625, "learning_rate": 6.281526873944882e-07, "loss": -0.014156047254800797, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2841, "train_speed(iter/s)": 0.027935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.25, "completions/mean_length": 123.11458778381348, "completions/min_length": 46.5, "epoch": 4.235294117647059, "grad_norm": 0.010387432235741708, "kl": 0.275390625, "learning_rate": 6.279240784842368e-07, "loss": 0.00027514868997968733, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2842, "train_speed(iter/s)": 0.027931 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.0, "completions/mean_length": 112.82291984558105, "completions/min_length": 46.5, "epoch": 4.236783320923306, "grad_norm": 0.004674926917095045, "kl": 0.276611328125, "learning_rate": 6.276954409549962e-07, "loss": 0.00027655030135065317, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2843, "train_speed(iter/s)": 0.027931 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 97.81250190734863, "completions/min_length": 14.5, "epoch": 4.238272524199553, "grad_norm": 0.005197287581685892, "kl": 0.3095703125, "learning_rate": 6.274667748579167e-07, "loss": 0.0003092871338594705, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2844, "train_speed(iter/s)": 0.027931 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 350.5, "completions/mean_length": 123.11458396911621, "completions/min_length": 50.0, "epoch": 4.2397617274758, "grad_norm": 0.004415480799944925, "kl": 0.27978515625, "learning_rate": 6.272380802441552e-07, "loss": 0.0002796087064780295, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2845, "train_speed(iter/s)": 0.02793 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.25, "completions/mean_length": 126.45833587646484, "completions/min_length": 34.5, "epoch": 4.241250930752048, "grad_norm": 0.004222877571539981, "kl": 0.269775390625, "learning_rate": 6.270093571648751e-07, "loss": 0.00026962035917676985, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2846, "train_speed(iter/s)": 0.02793 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.5, "completions/mean_length": 117.91667175292969, "completions/min_length": 41.75, "epoch": 4.242740134028295, "grad_norm": 0.006395922220300394, "kl": 0.26806640625, "learning_rate": 6.267806056712457e-07, "loss": 0.00026776589220389724, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2847, "train_speed(iter/s)": 0.02793 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.75, "completions/mean_length": 129.0104217529297, "completions/min_length": 37.25, "epoch": 4.244229337304542, "grad_norm": 1.483905534870892, "kl": 0.257080078125, "learning_rate": 6.265518258144433e-07, "loss": 0.001580632058903575, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.10205466859042645, "rewards/CineAccuracyORM/mean": 0.5729166939854622, "rewards/CineAccuracyORM/std": 0.48275065422058105, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2848, "train_speed(iter/s)": 0.02793 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.25, "completions/mean_length": 120.5000057220459, "completions/min_length": 56.25, "epoch": 4.245718540580789, "grad_norm": 0.0044762039345391845, "kl": 0.27392578125, "learning_rate": 6.263230176456497e-07, "loss": 0.0002740532800089568, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2849, "train_speed(iter/s)": 0.027932 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 129.2291717529297, "completions/min_length": 48.75, "epoch": 4.247207743857037, "grad_norm": 0.004610907391170272, "kl": 0.254150390625, "learning_rate": 6.260941812160541e-07, "loss": 0.0002543751324992627, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2850, "train_speed(iter/s)": 0.027932 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.0, "completions/mean_length": 121.44792175292969, "completions/min_length": 44.25, "epoch": 4.248696947133284, "grad_norm": 0.007269400593780386, "kl": 0.29541015625, "learning_rate": 6.25865316576851e-07, "loss": 0.0002953944494947791, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2851, "train_speed(iter/s)": 0.02793 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 109.51041793823242, "completions/min_length": 46.75, "epoch": 4.250186150409531, "grad_norm": 1.0023829660478318, "kl": 0.3056640625, "learning_rate": 6.256364237792419e-07, "loss": 0.010575800202786922, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2852, "train_speed(iter/s)": 0.027933 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 106.45833396911621, "completions/min_length": 32.75, "epoch": 4.251675353685778, "grad_norm": 0.00419666364023583, "kl": 0.28369140625, "learning_rate": 6.254075028744343e-07, "loss": 0.0002841888344846666, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2853, "train_speed(iter/s)": 0.027934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 96.92708587646484, "completions/min_length": 23.5, "epoch": 4.253164556962025, "grad_norm": 0.00475461173876674, "kl": 0.3359375, "learning_rate": 6.251785539136421e-07, "loss": 0.0003353946376591921, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2854, "train_speed(iter/s)": 0.027934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.5, "completions/mean_length": 117.36458587646484, "completions/min_length": 46.5, "epoch": 4.254653760238273, "grad_norm": 0.005917938168629833, "kl": 0.2744140625, "learning_rate": 6.249495769480855e-07, "loss": 0.00027445692103356123, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2855, "train_speed(iter/s)": 0.027932 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 114.16666984558105, "completions/min_length": 34.75, "epoch": 4.256142963514519, "grad_norm": 0.00778229870863753, "kl": 0.2939453125, "learning_rate": 6.247205720289907e-07, "loss": 0.00029356812592595816, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2856, "train_speed(iter/s)": 0.027932 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.75, "completions/mean_length": 119.00000381469727, "completions/min_length": 29.0, "epoch": 4.257632166790767, "grad_norm": 0.007021643050555166, "kl": 0.27783203125, "learning_rate": 6.244915392075903e-07, "loss": 0.00027789518935605884, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2857, "train_speed(iter/s)": 0.027929 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.75, "completions/mean_length": 110.65625381469727, "completions/min_length": 37.0, "epoch": 4.259121370067014, "grad_norm": 0.004696059565852131, "kl": 0.2880859375, "learning_rate": 6.242624785351235e-07, "loss": 0.00028813397511839867, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2858, "train_speed(iter/s)": 0.027929 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 122.01041984558105, "completions/min_length": 46.25, "epoch": 4.2606105733432615, "grad_norm": 0.6656124672122199, "kl": 0.28125, "learning_rate": 6.240333900628353e-07, "loss": -0.005976082757115364, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2859, "train_speed(iter/s)": 0.027925 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.5, "completions/mean_length": 127.48958778381348, "completions/min_length": 46.0, "epoch": 4.262099776619508, "grad_norm": 0.006114016040519897, "kl": 0.283203125, "learning_rate": 6.23804273841977e-07, "loss": 0.00028320838464424014, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2860, "train_speed(iter/s)": 0.027922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.5, "completions/mean_length": 125.12500381469727, "completions/min_length": 40.0, "epoch": 4.263588979895756, "grad_norm": 0.00541923257353236, "kl": 0.268798828125, "learning_rate": 6.235751299238059e-07, "loss": 0.0002686146763153374, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2861, "train_speed(iter/s)": 0.027921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.0, "completions/mean_length": 134.46875381469727, "completions/min_length": 50.0, "epoch": 4.265078183172003, "grad_norm": 0.004950466054038434, "kl": 0.2705078125, "learning_rate": 6.233459583595861e-07, "loss": 0.00027007178869098425, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2862, "train_speed(iter/s)": 0.027919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 110.44791793823242, "completions/min_length": 40.5, "epoch": 4.2665673864482505, "grad_norm": 0.004087660321853229, "kl": 0.27734375, "learning_rate": 6.231167592005875e-07, "loss": 0.0002774203894659877, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2863, "train_speed(iter/s)": 0.027921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.0, "completions/mean_length": 125.10417175292969, "completions/min_length": 49.0, "epoch": 4.268056589724497, "grad_norm": 0.005306657362500847, "kl": 0.260009765625, "learning_rate": 6.228875324980862e-07, "loss": 0.00026080149109475315, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2864, "train_speed(iter/s)": 0.027916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 117.44791984558105, "completions/min_length": 35.75, "epoch": 4.269545793000745, "grad_norm": 0.0044217768230876255, "kl": 0.27294921875, "learning_rate": 6.226582783033642e-07, "loss": 0.0002730879350565374, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2865, "train_speed(iter/s)": 0.027919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.75, "completions/mean_length": 118.11458587646484, "completions/min_length": 48.25, "epoch": 4.271034996276992, "grad_norm": 0.007775780092437108, "kl": 0.2890625, "learning_rate": 6.224289966677103e-07, "loss": 0.0002881958498619497, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2866, "train_speed(iter/s)": 0.027916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.75, "completions/mean_length": 114.03125190734863, "completions/min_length": 49.25, "epoch": 4.272524199553239, "grad_norm": 0.004613172981076834, "kl": 0.2939453125, "learning_rate": 6.221996876424185e-07, "loss": 0.0002936369273811579, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2867, "train_speed(iter/s)": 0.027913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.0, "completions/mean_length": 116.72916984558105, "completions/min_length": 51.5, "epoch": 4.274013402829486, "grad_norm": 0.005321529267151808, "kl": 0.28271484375, "learning_rate": 6.2197035127879e-07, "loss": 0.0002829057921189815, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2868, "train_speed(iter/s)": 0.027915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.75, "completions/mean_length": 133.21875, "completions/min_length": 35.5, "epoch": 4.275502606105733, "grad_norm": 0.007625965775808051, "kl": 0.28564453125, "learning_rate": 6.217409876281315e-07, "loss": 0.00028604670660570264, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2869, "train_speed(iter/s)": 0.027909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.0, "completions/mean_length": 118.20833587646484, "completions/min_length": 45.25, "epoch": 4.276991809381981, "grad_norm": 0.00404536655524829, "kl": 0.2744140625, "learning_rate": 6.215115967417559e-07, "loss": 0.0002745267702266574, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2870, "train_speed(iter/s)": 0.027911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.0, "completions/mean_length": 124.35416984558105, "completions/min_length": 40.75, "epoch": 4.2784810126582276, "grad_norm": 0.9777342127107834, "kl": 0.28466796875, "learning_rate": 6.212821786709819e-07, "loss": -0.014543849043548107, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333432674408, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2871, "train_speed(iter/s)": 0.027912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.0, "completions/mean_length": 124.36458587646484, "completions/min_length": 18.5, "epoch": 4.279970215934475, "grad_norm": 0.004425069208496245, "kl": 0.279296875, "learning_rate": 6.210527334671352e-07, "loss": 0.00027924071764573455, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2872, "train_speed(iter/s)": 0.027912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.75, "completions/mean_length": 125.22916984558105, "completions/min_length": 52.25, "epoch": 4.281459419210722, "grad_norm": 1.1789998668514377, "kl": 0.28369140625, "learning_rate": 6.208232611815463e-07, "loss": 0.0032652574591338634, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2873, "train_speed(iter/s)": 0.027911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 115.67708778381348, "completions/min_length": 52.75, "epoch": 4.28294862248697, "grad_norm": 0.9188396246613763, "kl": 0.277587890625, "learning_rate": 6.205937618655526e-07, "loss": -0.02022239938378334, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2874, "train_speed(iter/s)": 0.027911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.25, "completions/mean_length": 131.50000381469727, "completions/min_length": 49.0, "epoch": 4.2844378257632165, "grad_norm": 2.0645147082385855, "kl": 0.25732421875, "learning_rate": 6.203642355704976e-07, "loss": -0.005488214083015919, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2875, "train_speed(iter/s)": 0.027911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 124.33333778381348, "completions/min_length": 61.5, "epoch": 4.285927029039464, "grad_norm": 1.591319447276175, "kl": 0.25244140625, "learning_rate": 6.201346823477302e-07, "loss": -0.0006931307725608349, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2876, "train_speed(iter/s)": 0.027913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.0, "completions/mean_length": 119.73958587646484, "completions/min_length": 50.0, "epoch": 4.287416232315711, "grad_norm": 0.004141868304520663, "kl": 0.25244140625, "learning_rate": 6.199051022486061e-07, "loss": 0.000252589990850538, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2877, "train_speed(iter/s)": 0.027913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.25, "completions/mean_length": 137.58333587646484, "completions/min_length": 44.0, "epoch": 4.288905435591959, "grad_norm": 0.5363756874403497, "kl": 0.800537109375, "learning_rate": 6.196754953244866e-07, "loss": 0.01759904995560646, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2878, "train_speed(iter/s)": 0.027912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 119.125, "completions/min_length": 44.25, "epoch": 4.2903946388682055, "grad_norm": 0.005688662216889097, "kl": 0.283447265625, "learning_rate": 6.194458616267387e-07, "loss": 0.0002834860351867974, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2879, "train_speed(iter/s)": 0.027913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 119.40625381469727, "completions/min_length": 36.75, "epoch": 4.291883842144452, "grad_norm": 0.8663141771140567, "kl": 0.2978515625, "learning_rate": 6.192162012067359e-07, "loss": -0.006505192257463932, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2880, "train_speed(iter/s)": 0.027912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.0, "completions/mean_length": 119.37500190734863, "completions/min_length": 52.5, "epoch": 4.2933730454207, "grad_norm": 1.145586694955169, "kl": 0.27392578125, "learning_rate": 6.189865141158574e-07, "loss": 0.009690472856163979, "memory(GiB)": 112.53, "reward": 1.875, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.1276884824037552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2881, "train_speed(iter/s)": 0.027914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 124.58333587646484, "completions/min_length": 44.0, "epoch": 4.294862248696947, "grad_norm": 0.004530067430361275, "kl": 0.27734375, "learning_rate": 6.187568004054888e-07, "loss": 0.00027741986559703946, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2882, "train_speed(iter/s)": 0.027913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.5, "completions/mean_length": 130.2187557220459, "completions/min_length": 45.5, "epoch": 4.2963514519731945, "grad_norm": 0.004864931230848525, "kl": 0.265625, "learning_rate": 6.18527060127021e-07, "loss": 0.0002658646844793111, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2883, "train_speed(iter/s)": 0.027907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 110.04166793823242, "completions/min_length": 41.75, "epoch": 4.297840655249441, "grad_norm": 0.004505974827297417, "kl": 0.283203125, "learning_rate": 6.182972933318511e-07, "loss": 0.00028331048088148236, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2884, "train_speed(iter/s)": 0.027907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 108.78125190734863, "completions/min_length": 39.75, "epoch": 4.299329858525689, "grad_norm": 0.8054577992077953, "kl": 0.27490234375, "learning_rate": 6.180675000713824e-07, "loss": 0.022317778319120407, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2885, "train_speed(iter/s)": 0.027907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.75, "completions/mean_length": 120.95833778381348, "completions/min_length": 39.0, "epoch": 4.300819061801936, "grad_norm": 0.919036646056549, "kl": 0.3662109375, "learning_rate": 6.178376803970239e-07, "loss": 0.005150836426764727, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.3154253140091896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2886, "train_speed(iter/s)": 0.027901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 111.00000190734863, "completions/min_length": 40.5, "epoch": 4.302308265078183, "grad_norm": 1.3778282607841537, "kl": 0.28662109375, "learning_rate": 6.176078343601903e-07, "loss": 0.0022502304054796696, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2887, "train_speed(iter/s)": 0.027899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.0, "completions/mean_length": 125.93750381469727, "completions/min_length": 43.25, "epoch": 4.30379746835443, "grad_norm": 0.0038911998357388134, "kl": 0.263671875, "learning_rate": 6.173779620123027e-07, "loss": 0.00026386009994894266, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2888, "train_speed(iter/s)": 0.027899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.75, "completions/mean_length": 114.82292175292969, "completions/min_length": 43.25, "epoch": 4.305286671630678, "grad_norm": 0.004960679789954849, "kl": 0.28662109375, "learning_rate": 6.171480634047879e-07, "loss": 0.0002873237826861441, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2889, "train_speed(iter/s)": 0.027899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.25, "completions/mean_length": 117.20833778381348, "completions/min_length": 35.5, "epoch": 4.306775874906925, "grad_norm": 0.8618250530937436, "kl": 0.296875, "learning_rate": 6.169181385890782e-07, "loss": 0.014335766434669495, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2890, "train_speed(iter/s)": 0.027893 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.5, "completions/mean_length": 119.38541984558105, "completions/min_length": 41.5, "epoch": 4.308265078183172, "grad_norm": 0.004097922566870045, "kl": 0.2685546875, "learning_rate": 6.166881876166119e-07, "loss": 0.0002684410137590021, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2891, "train_speed(iter/s)": 0.02789 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 103.76041984558105, "completions/min_length": 39.75, "epoch": 4.309754281459419, "grad_norm": 0.009194571194317604, "kl": 0.3037109375, "learning_rate": 6.164582105388337e-07, "loss": 0.0003036820562556386, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2892, "train_speed(iter/s)": 0.027889 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.25, "completions/mean_length": 113.19791793823242, "completions/min_length": 51.0, "epoch": 4.311243484735666, "grad_norm": 1.306787434201211, "kl": 0.28564453125, "learning_rate": 6.162282074071935e-07, "loss": -0.02962137758731842, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.17466487362980843, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2893, "train_speed(iter/s)": 0.027889 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.75, "completions/mean_length": 116.73958587646484, "completions/min_length": 45.25, "epoch": 4.312732688011914, "grad_norm": 0.5809697255817378, "kl": 0.278564453125, "learning_rate": 6.159981782731473e-07, "loss": 0.012433482334017754, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2894, "train_speed(iter/s)": 0.027889 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.25, "completions/mean_length": 119.29167175292969, "completions/min_length": 45.0, "epoch": 4.3142218912881605, "grad_norm": 0.005714118072574225, "kl": 0.28515625, "learning_rate": 6.15768123188157e-07, "loss": 0.0002850864257197827, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2895, "train_speed(iter/s)": 0.027883 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.0, "completions/mean_length": 116.77083969116211, "completions/min_length": 44.75, "epoch": 4.315711094564408, "grad_norm": 0.004553932442809095, "kl": 0.27783203125, "learning_rate": 6.155380422036903e-07, "loss": 0.0002779843925964087, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2896, "train_speed(iter/s)": 0.02788 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 129.91666984558105, "completions/min_length": 58.75, "epoch": 4.317200297840655, "grad_norm": 1.495440101928045, "kl": 0.271728515625, "learning_rate": 6.153079353712201e-07, "loss": -0.02259860560297966, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.3717081770300865, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2897, "train_speed(iter/s)": 0.027877 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.25, "completions/mean_length": 117.35416984558105, "completions/min_length": 39.5, "epoch": 4.318689501116903, "grad_norm": 0.005239709269489649, "kl": 0.28515625, "learning_rate": 6.15077802742226e-07, "loss": 0.000285387912299484, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2898, "train_speed(iter/s)": 0.027879 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.25, "completions/mean_length": 108.22916984558105, "completions/min_length": 23.0, "epoch": 4.320178704393149, "grad_norm": 1.6260246429261864, "kl": 0.3037109375, "learning_rate": 6.148476443681927e-07, "loss": 0.025803549215197563, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3320881873369217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2899, "train_speed(iter/s)": 0.027874 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.25, "completions/mean_length": 124.51041984558105, "completions/min_length": 45.5, "epoch": 4.321667907669397, "grad_norm": 0.004677394816205362, "kl": 0.26025390625, "learning_rate": 6.146174603006109e-07, "loss": 0.00026052063913084567, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2900, "train_speed(iter/s)": 0.027874 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 116.13541984558105, "completions/min_length": 43.75, "epoch": 4.323157110945644, "grad_norm": 0.004877659871168438, "kl": 0.28466796875, "learning_rate": 6.143872505909774e-07, "loss": 0.0002843705005943775, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2901, "train_speed(iter/s)": 0.027869 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 114.31250190734863, "completions/min_length": 53.25, "epoch": 4.324646314221892, "grad_norm": 0.005098154186169733, "kl": 0.27783203125, "learning_rate": 6.14157015290794e-07, "loss": 0.00027793340268544853, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2902, "train_speed(iter/s)": 0.027868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.0, "completions/mean_length": 110.36458587646484, "completions/min_length": 39.25, "epoch": 4.326135517498138, "grad_norm": 0.00470358809035637, "kl": 0.26513671875, "learning_rate": 6.139267544515689e-07, "loss": 0.0002644177875481546, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2903, "train_speed(iter/s)": 0.027866 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.5, "completions/mean_length": 118.47916984558105, "completions/min_length": 39.25, "epoch": 4.327624720774386, "grad_norm": 1.3995456643581223, "kl": 0.2822265625, "learning_rate": 6.136964681248153e-07, "loss": -0.007252431940287352, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2904, "train_speed(iter/s)": 0.027861 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 106.64583587646484, "completions/min_length": 50.5, "epoch": 4.329113924050633, "grad_norm": 2.3737668812844146, "kl": 0.2861328125, "learning_rate": 6.134661563620529e-07, "loss": 0.004514303989708424, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2905, "train_speed(iter/s)": 0.02786 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 111.87500190734863, "completions/min_length": 38.75, "epoch": 4.33060312732688, "grad_norm": 1.3682296646917462, "kl": 0.2841796875, "learning_rate": 6.132358192148064e-07, "loss": -0.02300984039902687, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2906, "train_speed(iter/s)": 0.027861 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.0, "completions/mean_length": 118.04166984558105, "completions/min_length": 51.75, "epoch": 4.332092330603127, "grad_norm": 0.004527649325824706, "kl": 0.2626953125, "learning_rate": 6.130054567346069e-07, "loss": 0.0002628913789521903, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2907, "train_speed(iter/s)": 0.02786 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 112.86458587646484, "completions/min_length": 27.5, "epoch": 4.333581533879374, "grad_norm": 2.4813832974816945, "kl": 0.2861328125, "learning_rate": 6.127750689729905e-07, "loss": -0.02251773700118065, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.1034691073000431, "rewards/CineAccuracyORM/mean": 0.6250000111758709, "rewards/CineAccuracyORM/std": 0.3348836228251457, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2908, "train_speed(iter/s)": 0.027855 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 105.43750381469727, "completions/min_length": 26.75, "epoch": 4.335070737155622, "grad_norm": 0.0038089894454665482, "kl": 0.291015625, "learning_rate": 6.125446559814993e-07, "loss": 0.0002914931974373758, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2909, "train_speed(iter/s)": 0.027853 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.75, "completions/mean_length": 115.82291984558105, "completions/min_length": 42.75, "epoch": 4.336559940431869, "grad_norm": 1.1914152240563942, "kl": 0.27978515625, "learning_rate": 6.123142178116808e-07, "loss": -0.007019783835858107, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2910, "train_speed(iter/s)": 0.02785 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 105.45833587646484, "completions/min_length": 38.0, "epoch": 4.338049143708116, "grad_norm": 0.6268224161121926, "kl": 0.3056640625, "learning_rate": 6.120837545150885e-07, "loss": -0.0006747841252945364, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2911, "train_speed(iter/s)": 0.02785 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.0, "completions/mean_length": 117.86458778381348, "completions/min_length": 30.25, "epoch": 4.339538346984363, "grad_norm": 0.009774764666140412, "kl": 0.2802734375, "learning_rate": 6.118532661432811e-07, "loss": 0.00028038781601935625, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2912, "train_speed(iter/s)": 0.027847 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 101.73958587646484, "completions/min_length": 23.0, "epoch": 4.341027550260611, "grad_norm": 0.005145826721435065, "kl": 0.30419921875, "learning_rate": 6.116227527478234e-07, "loss": 0.0003035594127140939, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2913, "train_speed(iter/s)": 0.027845 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.25, "completions/mean_length": 147.00000762939453, "completions/min_length": 47.0, "epoch": 4.342516753536858, "grad_norm": 0.9787508291424115, "kl": 0.238525390625, "learning_rate": 6.113922143802853e-07, "loss": 0.003667989745736122, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2914, "train_speed(iter/s)": 0.027844 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.25, "completions/mean_length": 118.00000190734863, "completions/min_length": 31.25, "epoch": 4.344005956813105, "grad_norm": 0.004160246152965726, "kl": 0.278076171875, "learning_rate": 6.111616510922425e-07, "loss": 0.00027795322239398956, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2915, "train_speed(iter/s)": 0.027841 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 108.17708778381348, "completions/min_length": 41.25, "epoch": 4.345495160089352, "grad_norm": 0.005882241089297747, "kl": 0.291015625, "learning_rate": 6.109310629352765e-07, "loss": 0.00029162748251110315, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2916, "train_speed(iter/s)": 0.027839 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.75, "completions/mean_length": 113.92708778381348, "completions/min_length": 35.25, "epoch": 4.3469843633656, "grad_norm": 0.005055037293663646, "kl": 0.29736328125, "learning_rate": 6.10700449960974e-07, "loss": 0.00029733084375038743, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2917, "train_speed(iter/s)": 0.027839 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.25, "completions/mean_length": 122.01041793823242, "completions/min_length": 42.25, "epoch": 4.348473566641847, "grad_norm": 1.6477029529772502, "kl": 0.267578125, "learning_rate": 6.104698122209273e-07, "loss": 0.006368284113705158, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.21429424732923508, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2918, "train_speed(iter/s)": 0.02784 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.5, "completions/mean_length": 113.37500190734863, "completions/min_length": 36.0, "epoch": 4.349962769918093, "grad_norm": 0.7949332535060996, "kl": 0.27392578125, "learning_rate": 6.102391497667346e-07, "loss": -0.00485956622287631, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2919, "train_speed(iter/s)": 0.027839 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 103.42708587646484, "completions/min_length": 33.25, "epoch": 4.351451973194341, "grad_norm": 0.004785316529501409, "kl": 0.29638671875, "learning_rate": 6.100084626499991e-07, "loss": 0.00029557175002992153, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2920, "train_speed(iter/s)": 0.02784 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 105.69791984558105, "completions/min_length": 23.75, "epoch": 4.352941176470588, "grad_norm": 0.0045541800640828915, "kl": 0.30029296875, "learning_rate": 6.097777509223299e-07, "loss": 0.00030010007321834564, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2921, "train_speed(iter/s)": 0.027837 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.25, "completions/mean_length": 120.06250381469727, "completions/min_length": 32.75, "epoch": 4.3544303797468356, "grad_norm": 0.0046077822261513775, "kl": 0.27392578125, "learning_rate": 6.095470146353416e-07, "loss": 0.00027346410206519067, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2922, "train_speed(iter/s)": 0.027837 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 118.63541793823242, "completions/min_length": 34.5, "epoch": 4.355919583023082, "grad_norm": 1.1445248382605224, "kl": 0.280029296875, "learning_rate": 6.093162538406541e-07, "loss": -0.01177732553333044, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2923, "train_speed(iter/s)": 0.027835 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.0, "completions/mean_length": 126.26042366027832, "completions/min_length": 27.25, "epoch": 4.35740878629933, "grad_norm": 0.004873464369821584, "kl": 0.2880859375, "learning_rate": 6.090854685898927e-07, "loss": 0.0002873747725971043, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2924, "train_speed(iter/s)": 0.02783 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.25, "completions/mean_length": 112.71875381469727, "completions/min_length": 23.25, "epoch": 4.358897989575577, "grad_norm": 0.00408428163784246, "kl": 0.280517578125, "learning_rate": 6.088546589346885e-07, "loss": 0.00028102812939323485, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2925, "train_speed(iter/s)": 0.027827 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.0, "completions/mean_length": 129.30208587646484, "completions/min_length": 37.25, "epoch": 4.3603871928518245, "grad_norm": 0.004131517169884871, "kl": 0.26025390625, "learning_rate": 6.086238249266781e-07, "loss": 0.00026016365154646337, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2926, "train_speed(iter/s)": 0.027824 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/mean_length": 121.63541984558105, "completions/min_length": 36.5, "epoch": 4.361876396128071, "grad_norm": 0.004200103353811542, "kl": 0.2802734375, "learning_rate": 6.083929666175031e-07, "loss": 0.00028033863054588437, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2927, "train_speed(iter/s)": 0.027824 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 127.87500381469727, "completions/min_length": 42.25, "epoch": 4.363365599404319, "grad_norm": 0.004719003979572711, "kl": 0.26904296875, "learning_rate": 6.081620840588108e-07, "loss": 0.0002687814412638545, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2928, "train_speed(iter/s)": 0.027821 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 110.03125381469727, "completions/min_length": 35.25, "epoch": 4.364854802680566, "grad_norm": 0.004391277634445335, "kl": 0.28369140625, "learning_rate": 6.079311773022539e-07, "loss": 0.00028415941051207483, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2929, "train_speed(iter/s)": 0.027818 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.0, "completions/mean_length": 116.61458778381348, "completions/min_length": 34.0, "epoch": 4.3663440059568135, "grad_norm": 0.0038474178356157312, "kl": 0.25341796875, "learning_rate": 6.077002463994907e-07, "loss": 0.00025322590954601765, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2930, "train_speed(iter/s)": 0.027818 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.5, "completions/mean_length": 125.40625381469727, "completions/min_length": 48.75, "epoch": 4.36783320923306, "grad_norm": 0.004117502244042964, "kl": 0.253662109375, "learning_rate": 6.074692914021844e-07, "loss": 0.0002529668272472918, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2931, "train_speed(iter/s)": 0.02782 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 114.84375381469727, "completions/min_length": 30.0, "epoch": 4.369322412509307, "grad_norm": 2.0187269784642883, "kl": 0.2587890625, "learning_rate": 6.072383123620042e-07, "loss": 0.012076016515493393, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.11258216388523579, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3219047859311104, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2932, "train_speed(iter/s)": 0.02782 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.0, "completions/mean_length": 130.5625057220459, "completions/min_length": 38.25, "epoch": 4.370811615785555, "grad_norm": 0.003946975884447033, "kl": 0.24951171875, "learning_rate": 6.070073093306245e-07, "loss": 0.0002494904329068959, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2933, "train_speed(iter/s)": 0.027818 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.25, "completions/mean_length": 127.76042175292969, "completions/min_length": 52.25, "epoch": 4.372300819061802, "grad_norm": 0.00391259904350869, "kl": 0.25390625, "learning_rate": 6.067762823597245e-07, "loss": 0.00025365117471665144, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2934, "train_speed(iter/s)": 0.027812 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 112.40625381469727, "completions/min_length": 30.75, "epoch": 4.373790022338049, "grad_norm": 0.004738105922199718, "kl": 0.2783203125, "learning_rate": 6.065452315009898e-07, "loss": 0.0002780586073640734, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2935, "train_speed(iter/s)": 0.027813 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.0, "completions/mean_length": 134.38541984558105, "completions/min_length": 35.25, "epoch": 4.375279225614296, "grad_norm": 0.03989199714245392, "kl": 0.254150390625, "learning_rate": 6.063141568061103e-07, "loss": 0.00025424649356864393, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2936, "train_speed(iter/s)": 0.027807 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.0, "completions/mean_length": 116.56250381469727, "completions/min_length": 36.75, "epoch": 4.376768428890544, "grad_norm": 1.494196341645952, "kl": 0.38671875, "learning_rate": 6.060830583267818e-07, "loss": -0.0158963892608881, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2937, "train_speed(iter/s)": 0.027802 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 118.58333778381348, "completions/min_length": 39.75, "epoch": 4.3782576321667905, "grad_norm": 0.004440423446475714, "kl": 0.254150390625, "learning_rate": 6.058519361147054e-07, "loss": 0.0002535810926929116, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2938, "train_speed(iter/s)": 0.027802 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 118.84375190734863, "completions/min_length": 30.75, "epoch": 4.379746835443038, "grad_norm": 0.8223788984156435, "kl": 0.279296875, "learning_rate": 6.056207902215873e-07, "loss": -0.007824220694601536, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2939, "train_speed(iter/s)": 0.027802 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.5, "completions/mean_length": 123.37500190734863, "completions/min_length": 28.75, "epoch": 4.381236038719285, "grad_norm": 0.0035981569918690326, "kl": 0.239990234375, "learning_rate": 6.053896206991393e-07, "loss": 0.00024020222190301865, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2940, "train_speed(iter/s)": 0.027799 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.5, "completions/mean_length": 132.78125190734863, "completions/min_length": 43.75, "epoch": 4.382725241995533, "grad_norm": 0.003739088305440338, "kl": 0.246826171875, "learning_rate": 6.051584275990785e-07, "loss": 0.00024662562645971775, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2941, "train_speed(iter/s)": 0.027796 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.5, "completions/mean_length": 133.1041717529297, "completions/min_length": 43.0, "epoch": 4.3842144452717795, "grad_norm": 0.0038439858883807, "kl": 0.242919921875, "learning_rate": 6.049272109731264e-07, "loss": 0.00024248502450063825, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2942, "train_speed(iter/s)": 0.027793 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.25, "completions/mean_length": 135.21875381469727, "completions/min_length": 37.75, "epoch": 4.385703648548027, "grad_norm": 1.1210058511278111, "kl": 0.262451171875, "learning_rate": 6.04695970873011e-07, "loss": 0.020306676626205444, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2943, "train_speed(iter/s)": 0.02779 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.5, "completions/mean_length": 140.56250381469727, "completions/min_length": 29.75, "epoch": 4.387192851824274, "grad_norm": 0.005196730408755538, "kl": 0.2568359375, "learning_rate": 6.044647073504649e-07, "loss": 0.0002568516065366566, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2944, "train_speed(iter/s)": 0.027785 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.75, "completions/mean_length": 133.08333778381348, "completions/min_length": 54.25, "epoch": 4.388682055100521, "grad_norm": 0.011809463698088333, "kl": 0.245361328125, "learning_rate": 6.04233420457226e-07, "loss": 0.00024514447432011366, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2945, "train_speed(iter/s)": 0.027784 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 103.06250190734863, "completions/min_length": 24.75, "epoch": 4.3901712583767685, "grad_norm": 1.5235116496072456, "kl": 0.275634765625, "learning_rate": 6.040021102450375e-07, "loss": 0.014527074061334133, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2946, "train_speed(iter/s)": 0.027786 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.25, "completions/mean_length": 126.93750381469727, "completions/min_length": 21.0, "epoch": 4.391660461653015, "grad_norm": 1.0754975818787795, "kl": 0.275390625, "learning_rate": 6.037707767656478e-07, "loss": 0.0038968033622950315, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2947, "train_speed(iter/s)": 0.027783 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 114.86458778381348, "completions/min_length": 40.5, "epoch": 4.393149664929263, "grad_norm": 1.4456841255981727, "kl": 0.2587890625, "learning_rate": 6.035394200708103e-07, "loss": -0.0038342508487403393, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.4888460487127304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2948, "train_speed(iter/s)": 0.027786 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.75, "completions/mean_length": 118.72916984558105, "completions/min_length": 39.25, "epoch": 4.39463886820551, "grad_norm": 0.006782698506620622, "kl": 0.266357421875, "learning_rate": 6.033080402122841e-07, "loss": 0.0002661064499989152, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2949, "train_speed(iter/s)": 0.027786 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/mean_length": 116.42708778381348, "completions/min_length": 41.25, "epoch": 4.396128071481757, "grad_norm": 0.004183854929746054, "kl": 0.287353515625, "learning_rate": 6.030766372418329e-07, "loss": 0.00028725049924105406, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2950, "train_speed(iter/s)": 0.027784 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 109.95833778381348, "completions/min_length": 22.0, "epoch": 4.397617274758004, "grad_norm": 0.004383046859639457, "kl": 0.283935546875, "learning_rate": 6.028452112112258e-07, "loss": 0.00028412483516149223, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2951, "train_speed(iter/s)": 0.027784 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.75, "completions/mean_length": 123.95833778381348, "completions/min_length": 46.0, "epoch": 4.399106478034252, "grad_norm": 0.005156232628297652, "kl": 0.27978515625, "learning_rate": 6.026137621722376e-07, "loss": 0.00027964258333668113, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2952, "train_speed(iter/s)": 0.027783 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.5, "completions/mean_length": 126.81250381469727, "completions/min_length": 28.75, "epoch": 4.400595681310499, "grad_norm": 3.0981415245538817, "kl": 0.258056640625, "learning_rate": 6.02382290176647e-07, "loss": -0.005950912367552519, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2953, "train_speed(iter/s)": 0.027783 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.75, "completions/mean_length": 125.07291984558105, "completions/min_length": 34.25, "epoch": 4.402084884586746, "grad_norm": 0.004999444227333435, "kl": 0.25634765625, "learning_rate": 6.021507952762391e-07, "loss": 0.00025591079611331224, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2954, "train_speed(iter/s)": 0.027783 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.5, "completions/mean_length": 130.8541717529297, "completions/min_length": 35.0, "epoch": 4.403574087862993, "grad_norm": 0.6928266357675398, "kl": 0.250244140625, "learning_rate": 6.019192775228036e-07, "loss": -0.012850817292928696, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2955, "train_speed(iter/s)": 0.027782 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.75, "completions/mean_length": 121.80208587646484, "completions/min_length": 31.75, "epoch": 4.405063291139241, "grad_norm": 0.004900297699888942, "kl": 0.291015625, "learning_rate": 6.016877369681349e-07, "loss": 0.00029052860918454826, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2956, "train_speed(iter/s)": 0.027776 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.75, "completions/mean_length": 132.1458339691162, "completions/min_length": 21.0, "epoch": 4.406552494415488, "grad_norm": 0.004074371336341307, "kl": 0.25927734375, "learning_rate": 6.014561736640333e-07, "loss": 0.00025932880816981196, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2957, "train_speed(iter/s)": 0.027775 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.25, "completions/mean_length": 135.4062557220459, "completions/min_length": 27.5, "epoch": 4.4080416976917345, "grad_norm": 0.0038833798287287017, "kl": 0.24560546875, "learning_rate": 6.012245876623037e-07, "loss": 0.0002456308575347066, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2958, "train_speed(iter/s)": 0.027775 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.5, "completions/mean_length": 133.4270839691162, "completions/min_length": 33.5, "epoch": 4.409530900967982, "grad_norm": 0.9561557522125017, "kl": 0.26318359375, "learning_rate": 6.009929790147564e-07, "loss": 0.00539008341729641, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2959, "train_speed(iter/s)": 0.027774 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.75, "completions/mean_length": 133.0416717529297, "completions/min_length": 39.0, "epoch": 4.411020104244229, "grad_norm": 0.022923263974431445, "kl": 0.263427734375, "learning_rate": 6.007613477732061e-07, "loss": 0.0002639133599586785, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2960, "train_speed(iter/s)": 0.027771 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 393.25, "completions/mean_length": 152.6458396911621, "completions/min_length": 40.0, "epoch": 4.412509307520477, "grad_norm": 0.6572303546608245, "kl": 0.2314453125, "learning_rate": 6.005296939894733e-07, "loss": 0.01580938510596752, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2961, "train_speed(iter/s)": 0.027768 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 367.0, "completions/mean_length": 137.92708587646484, "completions/min_length": 38.5, "epoch": 4.4139985107967235, "grad_norm": 0.004417415122897272, "kl": 0.260009765625, "learning_rate": 6.002980177153831e-07, "loss": 0.0002602719177957624, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2962, "train_speed(iter/s)": 0.027763 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 323.25, "completions/mean_length": 134.9479217529297, "completions/min_length": 52.0, "epoch": 4.415487714072971, "grad_norm": 1.0365118048482334, "kl": 0.251953125, "learning_rate": 6.000663190027658e-07, "loss": -0.004040132276713848, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3568698540329933, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2963, "train_speed(iter/s)": 0.027764 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.75, "completions/mean_length": 145.51041793823242, "completions/min_length": 46.5, "epoch": 4.416976917349218, "grad_norm": 0.8691545145125118, "kl": 0.231689453125, "learning_rate": 5.998345979034569e-07, "loss": -0.004209444392472506, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2964, "train_speed(iter/s)": 0.027764 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 415.5, "completions/mean_length": 139.13541793823242, "completions/min_length": 40.25, "epoch": 4.418466120625466, "grad_norm": 1.0078518798886262, "kl": 0.249267578125, "learning_rate": 5.996028544692966e-07, "loss": 0.006625776644796133, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2965, "train_speed(iter/s)": 0.02776 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.0, "completions/mean_length": 144.1666717529297, "completions/min_length": 41.75, "epoch": 4.419955323901712, "grad_norm": 0.0036584430160668626, "kl": 0.2431640625, "learning_rate": 5.993710887521302e-07, "loss": 0.0002430095337331295, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2966, "train_speed(iter/s)": 0.027758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.0, "completions/mean_length": 140.38541984558105, "completions/min_length": 50.0, "epoch": 4.42144452717796, "grad_norm": 1.3406562847380137, "kl": 0.248046875, "learning_rate": 5.99139300803808e-07, "loss": -0.016288941726088524, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.30001722276210785, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2967, "train_speed(iter/s)": 0.027751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 358.25, "completions/mean_length": 148.12500762939453, "completions/min_length": 58.0, "epoch": 4.422933730454207, "grad_norm": 0.004083774112190291, "kl": 0.2470703125, "learning_rate": 5.98907490676185e-07, "loss": 0.0002465292054694146, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2968, "train_speed(iter/s)": 0.027746 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.25, "completions/mean_length": 120.43750190734863, "completions/min_length": 35.25, "epoch": 4.424422933730455, "grad_norm": 0.003836699015429941, "kl": 0.25830078125, "learning_rate": 5.986756584211217e-07, "loss": 0.00025801057927310467, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2969, "train_speed(iter/s)": 0.027745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.75, "completions/mean_length": 137.80208778381348, "completions/min_length": 41.75, "epoch": 4.425912137006701, "grad_norm": 0.0037447906110710497, "kl": 0.264404296875, "learning_rate": 5.984438040904834e-07, "loss": 0.000263850437477231, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2970, "train_speed(iter/s)": 0.027745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.0, "completions/mean_length": 146.77083587646484, "completions/min_length": 29.75, "epoch": 4.427401340282948, "grad_norm": 0.005804301714029399, "kl": 0.2587890625, "learning_rate": 5.982119277361399e-07, "loss": 0.00025919711333699524, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2971, "train_speed(iter/s)": 0.027744 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 391.5, "completions/mean_length": 153.65625190734863, "completions/min_length": 35.0, "epoch": 4.428890543559196, "grad_norm": 1.0424824738501803, "kl": 0.231689453125, "learning_rate": 5.979800294099665e-07, "loss": -0.01862872764468193, "memory(GiB)": 112.53, "reward": 1.5416667461395264, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666828095913, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2972, "train_speed(iter/s)": 0.027743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 360.25, "completions/mean_length": 146.9375057220459, "completions/min_length": 47.25, "epoch": 4.430379746835443, "grad_norm": 1.141481798648694, "kl": 0.252685546875, "learning_rate": 5.97748109163843e-07, "loss": 0.0067833466455340385, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2973, "train_speed(iter/s)": 0.027739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.0, "completions/mean_length": 168.7395896911621, "completions/min_length": 43.0, "epoch": 4.43186895011169, "grad_norm": 0.9253533928749761, "kl": 0.23486328125, "learning_rate": 5.975161670496542e-07, "loss": 0.022251226007938385, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2974, "train_speed(iter/s)": 0.027738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.0, "completions/mean_length": 136.64583587646484, "completions/min_length": 24.75, "epoch": 4.433358153387937, "grad_norm": 0.004054977569702912, "kl": 0.247802734375, "learning_rate": 5.9728420311929e-07, "loss": 0.0002473960630595684, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2975, "train_speed(iter/s)": 0.027737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.5, "completions/mean_length": 148.50000381469727, "completions/min_length": 29.25, "epoch": 4.434847356664185, "grad_norm": 0.004655912760756666, "kl": 0.238525390625, "learning_rate": 5.970522174246452e-07, "loss": 0.00023846345720812678, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2976, "train_speed(iter/s)": 0.027737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.75, "completions/mean_length": 149.82292556762695, "completions/min_length": 50.25, "epoch": 4.436336559940432, "grad_norm": 0.003946824192402886, "kl": 0.221435546875, "learning_rate": 5.968202100176188e-07, "loss": 0.00022153118334244937, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2977, "train_speed(iter/s)": 0.027733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.25, "completions/mean_length": 139.7604217529297, "completions/min_length": 52.75, "epoch": 4.437825763216679, "grad_norm": 1.5532938998368508, "kl": 0.2451171875, "learning_rate": 5.965881809501157e-07, "loss": 0.016960332170128822, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3477324768900871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2978, "train_speed(iter/s)": 0.027734 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.25, "completions/mean_length": 132.58333778381348, "completions/min_length": 27.75, "epoch": 4.439314966492926, "grad_norm": 1.3917644998042369, "kl": 0.26611328125, "learning_rate": 5.963561302740448e-07, "loss": 0.01743663102388382, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2979, "train_speed(iter/s)": 0.027733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.25, "completions/mean_length": 140.9895896911621, "completions/min_length": 38.25, "epoch": 4.440804169769174, "grad_norm": 1.6519026188533412, "kl": 0.26123046875, "learning_rate": 5.961240580413203e-07, "loss": -0.010159874334931374, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2980, "train_speed(iter/s)": 0.027735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 351.0, "completions/mean_length": 144.32291984558105, "completions/min_length": 33.5, "epoch": 4.442293373045421, "grad_norm": 0.46322644693857296, "kl": 0.24755859375, "learning_rate": 5.958919643038608e-07, "loss": 0.014746243134140968, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2981, "train_speed(iter/s)": 0.027736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.5, "completions/mean_length": 146.22917556762695, "completions/min_length": 42.0, "epoch": 4.443782576321668, "grad_norm": 0.004699599648154909, "kl": 0.256103515625, "learning_rate": 5.956598491135901e-07, "loss": 0.00025622249813750386, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2982, "train_speed(iter/s)": 0.027736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 302.5, "completions/mean_length": 136.5520896911621, "completions/min_length": 47.75, "epoch": 4.445271779597915, "grad_norm": 0.003994284029566585, "kl": 0.2392578125, "learning_rate": 5.954277125224369e-07, "loss": 0.0002389051951467991, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2983, "train_speed(iter/s)": 0.027735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.75, "completions/mean_length": 140.57292366027832, "completions/min_length": 29.0, "epoch": 4.446760982874162, "grad_norm": 0.004510337062277266, "kl": 0.24365234375, "learning_rate": 5.951955545823342e-07, "loss": 0.00024349939485546201, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2984, "train_speed(iter/s)": 0.027737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.75, "completions/mean_length": 145.5416717529297, "completions/min_length": 27.25, "epoch": 4.44825018615041, "grad_norm": 0.00515188624346104, "kl": 0.236572265625, "learning_rate": 5.949633753452201e-07, "loss": 0.0002371769369347021, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2985, "train_speed(iter/s)": 0.027737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 413.5, "completions/mean_length": 144.9791717529297, "completions/min_length": 31.75, "epoch": 4.449739389426656, "grad_norm": 0.6095272596397838, "kl": 0.2529296875, "learning_rate": 5.947311748630374e-07, "loss": -0.014212936162948608, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2986, "train_speed(iter/s)": 0.027732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 320.5, "completions/mean_length": 137.4687557220459, "completions/min_length": 41.75, "epoch": 4.451228592702904, "grad_norm": 0.004377035328978902, "kl": 0.2568359375, "learning_rate": 5.944989531877337e-07, "loss": 0.0002569106873124838, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2987, "train_speed(iter/s)": 0.02773 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 341.0, "completions/mean_length": 133.26041793823242, "completions/min_length": 34.25, "epoch": 4.452717795979151, "grad_norm": 0.08955353309617914, "kl": 0.39794921875, "learning_rate": 5.942667103712612e-07, "loss": 0.00039843187551014125, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2988, "train_speed(iter/s)": 0.027729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.0, "completions/mean_length": 136.91666793823242, "completions/min_length": 35.5, "epoch": 4.4542069992553985, "grad_norm": 1.012921212587223, "kl": 0.2529296875, "learning_rate": 5.940344464655771e-07, "loss": -0.012766260653734207, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3748745322227478, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2989, "train_speed(iter/s)": 0.027729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 334.75, "completions/mean_length": 143.9583396911621, "completions/min_length": 38.75, "epoch": 4.455696202531645, "grad_norm": 0.9073842071687069, "kl": 0.234619140625, "learning_rate": 5.938021615226431e-07, "loss": 0.01661410741508007, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.29910537227988243, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2990, "train_speed(iter/s)": 0.027724 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.5, "completions/mean_length": 127.5000057220459, "completions/min_length": 26.0, "epoch": 4.457185405807893, "grad_norm": 0.003827785705090066, "kl": 0.27783203125, "learning_rate": 5.935698555944255e-07, "loss": 0.0002779067144729197, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2991, "train_speed(iter/s)": 0.027718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 124.72916984558105, "completions/min_length": 35.0, "epoch": 4.45867460908414, "grad_norm": 0.003939374665568281, "kl": 0.27197265625, "learning_rate": 5.933375287328954e-07, "loss": 0.00027237628819420934, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2992, "train_speed(iter/s)": 0.027721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.0, "completions/mean_length": 123.1250057220459, "completions/min_length": 37.75, "epoch": 4.4601638123603875, "grad_norm": 0.02271230459063555, "kl": 0.275390625, "learning_rate": 5.93105180990029e-07, "loss": 0.0002752154250629246, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2993, "train_speed(iter/s)": 0.027723 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.0, "completions/mean_length": 128.22916984558105, "completions/min_length": 29.5, "epoch": 4.461653015636634, "grad_norm": 0.928973956826057, "kl": 0.263427734375, "learning_rate": 5.928728124178064e-07, "loss": 0.0026739072054624557, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2994, "train_speed(iter/s)": 0.02772 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.25, "completions/mean_length": 122.70833587646484, "completions/min_length": 37.25, "epoch": 4.463142218912882, "grad_norm": 0.004495155442499572, "kl": 0.248046875, "learning_rate": 5.92640423068213e-07, "loss": 0.00024840852711349726, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2995, "train_speed(iter/s)": 0.027721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.25, "completions/mean_length": 124.64583587646484, "completions/min_length": 47.25, "epoch": 4.464631422189129, "grad_norm": 1.0173918037351586, "kl": 0.270751953125, "learning_rate": 5.924080129932385e-07, "loss": -0.012313666753470898, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2996, "train_speed(iter/s)": 0.027721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.25, "completions/mean_length": 140.5937557220459, "completions/min_length": 49.5, "epoch": 4.466120625465376, "grad_norm": 0.0040417926491657895, "kl": 0.247314453125, "learning_rate": 5.921755822448773e-07, "loss": 0.0002472959167789668, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2997, "train_speed(iter/s)": 0.027721 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 120.89583587646484, "completions/min_length": 31.25, "epoch": 4.467609828741623, "grad_norm": 0.004953050849429882, "kl": 0.27001953125, "learning_rate": 5.919431308751287e-07, "loss": 0.00027012344799004495, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2998, "train_speed(iter/s)": 0.027719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 116.34375381469727, "completions/min_length": 33.25, "epoch": 4.46909903201787, "grad_norm": 2.319400252630602, "kl": 0.28369140625, "learning_rate": 5.917106589359962e-07, "loss": 0.006377221085131168, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 2999, "train_speed(iter/s)": 0.027717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.75, "completions/mean_length": 123.17708778381348, "completions/min_length": 28.5, "epoch": 4.470588235294118, "grad_norm": 0.004497341557766312, "kl": 0.27392578125, "learning_rate": 5.914781664794881e-07, "loss": 0.0002743719087447971, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3000, "train_speed(iter/s)": 0.027718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.25, "completions/mean_length": 131.61458778381348, "completions/min_length": 50.5, "epoch": 4.4720774385703645, "grad_norm": 0.004513413493318947, "kl": 0.260009765625, "learning_rate": 5.91245653557617e-07, "loss": 0.0002601587038952857, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3001, "train_speed(iter/s)": 0.027705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.5, "completions/mean_length": 120.06250381469727, "completions/min_length": 26.0, "epoch": 4.473566641846612, "grad_norm": 0.004271044081178072, "kl": 0.28271484375, "learning_rate": 5.91013120222401e-07, "loss": 0.0002826560230460018, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3002, "train_speed(iter/s)": 0.027705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.5, "completions/mean_length": 125.72917175292969, "completions/min_length": 39.25, "epoch": 4.475055845122859, "grad_norm": 0.0050277902054447375, "kl": 0.2626953125, "learning_rate": 5.907805665258617e-07, "loss": 0.00026207385235466063, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3003, "train_speed(iter/s)": 0.027703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.75, "completions/mean_length": 133.67708587646484, "completions/min_length": 49.5, "epoch": 4.476545048399107, "grad_norm": 0.005188524522632612, "kl": 0.26123046875, "learning_rate": 5.905479925200257e-07, "loss": 0.00026088650338351727, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3004, "train_speed(iter/s)": 0.027698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.0, "completions/mean_length": 116.06250190734863, "completions/min_length": 44.5, "epoch": 4.4780342516753535, "grad_norm": 1.6929959638218988, "kl": 0.27587890625, "learning_rate": 5.903153982569242e-07, "loss": 0.023868408054113388, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000298023224, "rewards/CineAccuracyORM/std": 0.40787915512919426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3005, "train_speed(iter/s)": 0.027701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.0, "completions/mean_length": 125.10417175292969, "completions/min_length": 40.75, "epoch": 4.479523454951601, "grad_norm": 0.004805284735128682, "kl": 0.260498046875, "learning_rate": 5.900827837885927e-07, "loss": 0.00026041330420412123, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3006, "train_speed(iter/s)": 0.0277 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.25, "completions/mean_length": 128.4791717529297, "completions/min_length": 30.0, "epoch": 4.481012658227848, "grad_norm": 0.0052937404266933255, "kl": 0.257080078125, "learning_rate": 5.898501491670715e-07, "loss": 0.0002572079247329384, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3007, "train_speed(iter/s)": 0.0277 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 115.56250381469727, "completions/min_length": 42.0, "epoch": 4.482501861504096, "grad_norm": 1.0003310151303622, "kl": 0.28271484375, "learning_rate": 5.896174944444054e-07, "loss": 0.0082057761028409, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3008, "train_speed(iter/s)": 0.0277 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.25, "completions/mean_length": 136.53125381469727, "completions/min_length": 50.0, "epoch": 4.4839910647803425, "grad_norm": 0.43789352479000254, "kl": 0.255859375, "learning_rate": 5.893848196726434e-07, "loss": -0.022567911073565483, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3009, "train_speed(iter/s)": 0.027697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/mean_length": 128.15625762939453, "completions/min_length": 59.0, "epoch": 4.485480268056589, "grad_norm": 0.004258245560661013, "kl": 0.260009765625, "learning_rate": 5.891521249038392e-07, "loss": 0.00026035585324279964, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3010, "train_speed(iter/s)": 0.027697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.75, "completions/mean_length": 123.66667366027832, "completions/min_length": 38.5, "epoch": 4.486969471332837, "grad_norm": 0.6064298747086735, "kl": 0.261474609375, "learning_rate": 5.889194101900509e-07, "loss": 0.0008146528853103518, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3011, "train_speed(iter/s)": 0.027696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 118.50000190734863, "completions/min_length": 46.5, "epoch": 4.488458674609084, "grad_norm": 1.847863635019892, "kl": 0.27587890625, "learning_rate": 5.88686675583341e-07, "loss": -0.014623463153839111, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.10661446116864681, "rewards/CineAccuracyORM/mean": 0.5104166865348816, "rewards/CineAccuracyORM/std": 0.49582476168870926, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3012, "train_speed(iter/s)": 0.027699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.25, "completions/mean_length": 120.89583778381348, "completions/min_length": 55.25, "epoch": 4.4899478778853315, "grad_norm": 0.00413892438262052, "kl": 0.265625, "learning_rate": 5.884539211357768e-07, "loss": 0.0002657214063219726, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3013, "train_speed(iter/s)": 0.027701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.25, "completions/mean_length": 139.4791717529297, "completions/min_length": 44.25, "epoch": 4.491437081161578, "grad_norm": 0.004633892107622817, "kl": 0.253173828125, "learning_rate": 5.882211468994299e-07, "loss": 0.00025319060659967363, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3014, "train_speed(iter/s)": 0.027698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 130.37500381469727, "completions/min_length": 54.75, "epoch": 4.492926284437826, "grad_norm": 0.568220752341399, "kl": 0.258056640625, "learning_rate": 5.879883529263756e-07, "loss": -0.0016224246937781572, "memory(GiB)": 112.53, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3015, "train_speed(iter/s)": 0.027699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.25, "completions/mean_length": 124.52083969116211, "completions/min_length": 43.5, "epoch": 4.494415487714073, "grad_norm": 0.0042176565018183605, "kl": 0.25048828125, "learning_rate": 5.877555392686948e-07, "loss": 0.0002500566188246012, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3016, "train_speed(iter/s)": 0.027701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 126.89583778381348, "completions/min_length": 43.0, "epoch": 4.49590469099032, "grad_norm": 0.005459146662825245, "kl": 0.248779296875, "learning_rate": 5.87522705978472e-07, "loss": 0.00024916103575378656, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3017, "train_speed(iter/s)": 0.027701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.75, "completions/mean_length": 124.64583396911621, "completions/min_length": 54.0, "epoch": 4.497393894266567, "grad_norm": 0.0045165199690155805, "kl": 0.258056640625, "learning_rate": 5.872898531077965e-07, "loss": 0.00025803354219533503, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3018, "train_speed(iter/s)": 0.027703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.0, "completions/mean_length": 133.87500381469727, "completions/min_length": 51.0, "epoch": 4.498883097542815, "grad_norm": 0.0042831536202077945, "kl": 0.260009765625, "learning_rate": 5.870569807087616e-07, "loss": 0.0002600299776531756, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3019, "train_speed(iter/s)": 0.027703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.75, "completions/mean_length": 120.65625381469727, "completions/min_length": 45.0, "epoch": 4.500372300819062, "grad_norm": 0.00402496358315411, "kl": 0.2705078125, "learning_rate": 5.868240888334652e-07, "loss": 0.00027015991508960724, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3020, "train_speed(iter/s)": 0.027702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 127.90625381469727, "completions/min_length": 55.75, "epoch": 4.501861504095309, "grad_norm": 0.004087996346987655, "kl": 0.260498046875, "learning_rate": 5.865911775340096e-07, "loss": 0.0002603998873382807, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3021, "train_speed(iter/s)": 0.027701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.0, "completions/mean_length": 116.07291793823242, "completions/min_length": 37.0, "epoch": 4.503350707371556, "grad_norm": 0.004780790514423436, "kl": 0.287109375, "learning_rate": 5.863582468625013e-07, "loss": 0.00028687785379588604, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3022, "train_speed(iter/s)": 0.027701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 115.65625190734863, "completions/min_length": 52.0, "epoch": 4.504839910647803, "grad_norm": 0.0039354763077322705, "kl": 0.267822265625, "learning_rate": 5.861252968710515e-07, "loss": 0.0002674517163541168, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3023, "train_speed(iter/s)": 0.027703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.75, "completions/mean_length": 118.96875381469727, "completions/min_length": 40.0, "epoch": 4.506329113924051, "grad_norm": 0.02465686095336363, "kl": 0.267333984375, "learning_rate": 5.858923276117751e-07, "loss": 0.0002668577362783253, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3024, "train_speed(iter/s)": 0.027698 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.25, "completions/mean_length": 128.20833778381348, "completions/min_length": 44.25, "epoch": 4.5078183172002975, "grad_norm": 0.004149275986462373, "kl": 0.2607421875, "learning_rate": 5.856593391367917e-07, "loss": 0.00026042433455586433, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3025, "train_speed(iter/s)": 0.027697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 349.25, "completions/mean_length": 144.90625381469727, "completions/min_length": 40.75, "epoch": 4.509307520476545, "grad_norm": 0.8583554106205523, "kl": 0.2490234375, "learning_rate": 5.854263314982252e-07, "loss": 0.02645421028137207, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3026, "train_speed(iter/s)": 0.027689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.75, "completions/mean_length": 130.59375190734863, "completions/min_length": 42.5, "epoch": 4.510796723752792, "grad_norm": 0.6280585362365154, "kl": 0.338134765625, "learning_rate": 5.851933047482039e-07, "loss": 0.014086315408349037, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.12724433839321136, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3027, "train_speed(iter/s)": 0.027683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.75, "completions/mean_length": 124.30208778381348, "completions/min_length": 50.0, "epoch": 4.51228592702904, "grad_norm": 0.003622460249781177, "kl": 0.23974609375, "learning_rate": 5.8496025893886e-07, "loss": 0.0002391461021034047, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3028, "train_speed(iter/s)": 0.027683 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 114.57291984558105, "completions/min_length": 39.75, "epoch": 4.513775130305286, "grad_norm": 1.3957583369880993, "kl": 0.2919921875, "learning_rate": 5.8472719412233e-07, "loss": -0.0013507818803191185, "memory(GiB)": 112.53, "reward": 1.6041667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3029, "train_speed(iter/s)": 0.027678 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.0, "completions/mean_length": 127.20833587646484, "completions/min_length": 49.0, "epoch": 4.515264333581534, "grad_norm": 0.004263320731600468, "kl": 0.271484375, "learning_rate": 5.844941103507553e-07, "loss": 0.00027171469992026687, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3030, "train_speed(iter/s)": 0.027676 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 127.92708778381348, "completions/min_length": 43.75, "epoch": 4.516753536857781, "grad_norm": 0.004025010330372342, "kl": 0.251220703125, "learning_rate": 5.842610076762806e-07, "loss": 0.00025113127776421607, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3031, "train_speed(iter/s)": 0.027674 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.25, "completions/mean_length": 123.09375381469727, "completions/min_length": 42.0, "epoch": 4.518242740134029, "grad_norm": 1.3876678924446395, "kl": 0.2734375, "learning_rate": 5.840278861510555e-07, "loss": -0.00023951553157530725, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3032, "train_speed(iter/s)": 0.027671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 330.0, "completions/mean_length": 123.0625057220459, "completions/min_length": 41.25, "epoch": 4.519731943410275, "grad_norm": 0.004059237789010241, "kl": 0.2783203125, "learning_rate": 5.837947458272337e-07, "loss": 0.00027845764998346567, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3033, "train_speed(iter/s)": 0.027671 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.75, "completions/mean_length": 126.28125381469727, "completions/min_length": 43.0, "epoch": 4.521221146686523, "grad_norm": 0.005121974786143704, "kl": 0.243896484375, "learning_rate": 5.835615867569729e-07, "loss": 0.00024375118664465845, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3034, "train_speed(iter/s)": 0.02767 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 108.29166793823242, "completions/min_length": 42.0, "epoch": 4.52271034996277, "grad_norm": 0.8495374165350263, "kl": 0.29541015625, "learning_rate": 5.83328408992435e-07, "loss": -0.008945098146796227, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3035, "train_speed(iter/s)": 0.027668 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.0, "completions/mean_length": 116.22916793823242, "completions/min_length": 32.25, "epoch": 4.524199553239017, "grad_norm": 0.004797472162554586, "kl": 0.2890625, "learning_rate": 5.830952125857865e-07, "loss": 0.00028843811014667153, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3036, "train_speed(iter/s)": 0.027669 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/mean_length": 114.07292175292969, "completions/min_length": 43.25, "epoch": 4.525688756515264, "grad_norm": 0.392332236477356, "kl": 0.306640625, "learning_rate": 5.828619975891975e-07, "loss": 0.013504821807146072, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3037, "train_speed(iter/s)": 0.02767 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 104.86458778381348, "completions/min_length": 46.75, "epoch": 4.527177959791511, "grad_norm": 0.00445297774270099, "kl": 0.29638671875, "learning_rate": 5.826287640548424e-07, "loss": 0.000296139856800437, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3038, "train_speed(iter/s)": 0.02767 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.75, "completions/mean_length": 131.65625381469727, "completions/min_length": 47.75, "epoch": 4.528667163067759, "grad_norm": 1.2028302003843314, "kl": 0.26220703125, "learning_rate": 5.823955120349004e-07, "loss": -0.00467388890683651, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.6770833395421505, "rewards/CineAccuracyORM/std": 0.30001722276210785, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3039, "train_speed(iter/s)": 0.027665 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.5, "completions/mean_length": 129.09375381469727, "completions/min_length": 38.25, "epoch": 4.530156366344006, "grad_norm": 0.7826733781033576, "kl": 0.255859375, "learning_rate": 5.821622415815537e-07, "loss": 0.020115038380026817, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3040, "train_speed(iter/s)": 0.027662 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 110.22916984558105, "completions/min_length": 44.5, "epoch": 4.531645569620253, "grad_norm": 2.1614293278221814, "kl": 0.39501953125, "learning_rate": 5.819289527469897e-07, "loss": 0.004361344501376152, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3041, "train_speed(iter/s)": 0.02766 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.5, "completions/mean_length": 115.04166984558105, "completions/min_length": 36.0, "epoch": 4.5331347728965, "grad_norm": 0.004586503443889707, "kl": 0.266845703125, "learning_rate": 5.81695645583399e-07, "loss": 0.00026634635287337005, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3042, "train_speed(iter/s)": 0.02766 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/mean_length": 106.90625381469727, "completions/min_length": 36.5, "epoch": 4.534623976172748, "grad_norm": 0.7203917688809254, "kl": 0.27099609375, "learning_rate": 5.814623201429771e-07, "loss": -0.0027702178340405226, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3043, "train_speed(iter/s)": 0.027663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 105.14583396911621, "completions/min_length": 44.25, "epoch": 4.536113179448995, "grad_norm": 0.004500296121725115, "kl": 0.302734375, "learning_rate": 5.812289764779231e-07, "loss": 0.0003024611796718091, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3044, "train_speed(iter/s)": 0.027664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 108.93750190734863, "completions/min_length": 50.5, "epoch": 4.537602382725242, "grad_norm": 0.00445253693283534, "kl": 0.2734375, "learning_rate": 5.809956146404402e-07, "loss": 0.00027395939105190337, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3045, "train_speed(iter/s)": 0.027664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 111.48958587646484, "completions/min_length": 44.0, "epoch": 4.539091586001489, "grad_norm": 1.4317283079542176, "kl": 0.29833984375, "learning_rate": 5.807622346827361e-07, "loss": 0.004603441804647446, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.47217297554016113, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3046, "train_speed(iter/s)": 0.027664 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 105.22916984558105, "completions/min_length": 36.75, "epoch": 4.540580789277737, "grad_norm": 0.004260110099571559, "kl": 0.29541015625, "learning_rate": 5.80528836657022e-07, "loss": 0.000295419420581311, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3047, "train_speed(iter/s)": 0.027659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 113.22917175292969, "completions/min_length": 43.5, "epoch": 4.542069992553984, "grad_norm": 0.004464980664619312, "kl": 0.2880859375, "learning_rate": 5.802954206155133e-07, "loss": 0.00028780678985640407, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3048, "train_speed(iter/s)": 0.02766 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 102.80208587646484, "completions/min_length": 40.25, "epoch": 4.54355919583023, "grad_norm": 0.004693393908474357, "kl": 0.282470703125, "learning_rate": 5.800619866104295e-07, "loss": 0.0002827769494615495, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3049, "train_speed(iter/s)": 0.02766 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 101.86458587646484, "completions/min_length": 46.0, "epoch": 4.545048399106478, "grad_norm": 0.9764775866365272, "kl": 0.30322265625, "learning_rate": 5.798285346939942e-07, "loss": 0.0075222039595246315, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3050, "train_speed(iter/s)": 0.027658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 105.97916793823242, "completions/min_length": 37.75, "epoch": 4.546537602382725, "grad_norm": 0.004623156710438306, "kl": 0.27783203125, "learning_rate": 5.795950649184349e-07, "loss": 0.00027734972536563873, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3051, "train_speed(iter/s)": 0.02766 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.5, "completions/mean_length": 99.11458587646484, "completions/min_length": 35.0, "epoch": 4.5480268056589725, "grad_norm": 0.004745622821164138, "kl": 0.3037109375, "learning_rate": 5.793615773359832e-07, "loss": 0.00030325690750032663, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3052, "train_speed(iter/s)": 0.027663 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 101.34375190734863, "completions/min_length": 42.5, "epoch": 4.549516008935219, "grad_norm": 0.005645694100879795, "kl": 0.28515625, "learning_rate": 5.791280719988746e-07, "loss": 0.00028578273486346006, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3053, "train_speed(iter/s)": 0.027661 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 99.42708587646484, "completions/min_length": 40.75, "epoch": 4.551005212211467, "grad_norm": 0.00426183897168738, "kl": 0.30078125, "learning_rate": 5.788945489593485e-07, "loss": 0.00030084201716817915, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3054, "train_speed(iter/s)": 0.027657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 96.20833396911621, "completions/min_length": 40.25, "epoch": 4.552494415487714, "grad_norm": 0.004318606348591853, "kl": 0.283203125, "learning_rate": 5.786610082696481e-07, "loss": 0.0002833430771715939, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3055, "train_speed(iter/s)": 0.027658 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 96.72916793823242, "completions/min_length": 34.75, "epoch": 4.5539836187639615, "grad_norm": 0.0042546131409553975, "kl": 0.29052734375, "learning_rate": 5.784274499820213e-07, "loss": 0.00029042144888080657, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3056, "train_speed(iter/s)": 0.027659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 104.05208396911621, "completions/min_length": 37.25, "epoch": 4.555472822040208, "grad_norm": 0.004380815314293476, "kl": 0.29638671875, "learning_rate": 5.78193874148719e-07, "loss": 0.00029608490876853466, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3057, "train_speed(iter/s)": 0.027659 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 103.81250190734863, "completions/min_length": 41.0, "epoch": 4.556962025316456, "grad_norm": 1.3787554582348465, "kl": 0.287109375, "learning_rate": 5.779602808219968e-07, "loss": -0.00757987005636096, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3058, "train_speed(iter/s)": 0.027657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.75, "completions/mean_length": 96.15625190734863, "completions/min_length": 50.75, "epoch": 4.558451228592703, "grad_norm": 0.0043035791618073, "kl": 0.3212890625, "learning_rate": 5.777266700541134e-07, "loss": 0.0003213061427231878, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3059, "train_speed(iter/s)": 0.027655 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 96.59375190734863, "completions/min_length": 39.75, "epoch": 4.5599404318689505, "grad_norm": 2.061046008298287, "kl": 0.32568359375, "learning_rate": 5.774930418973322e-07, "loss": -0.012263174168765545, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.35134297609329224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3060, "train_speed(iter/s)": 0.027657 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 99.63541793823242, "completions/min_length": 49.75, "epoch": 4.561429635145197, "grad_norm": 0.005075634842514134, "kl": 0.29443359375, "learning_rate": 5.772593964039203e-07, "loss": 0.0002944814623333514, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3061, "train_speed(iter/s)": 0.027654 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 98.69791984558105, "completions/min_length": 28.0, "epoch": 4.562918838421444, "grad_norm": 0.004320728321130593, "kl": 0.2978515625, "learning_rate": 5.770257336261481e-07, "loss": 0.0002972368965856731, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3062, "train_speed(iter/s)": 0.027649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 89.14583587646484, "completions/min_length": 34.0, "epoch": 4.564408041697692, "grad_norm": 0.7588653047956573, "kl": 0.34228515625, "learning_rate": 5.767920536162905e-07, "loss": 0.0042694187723100185, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3063, "train_speed(iter/s)": 0.02765 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 95.82291984558105, "completions/min_length": 48.0, "epoch": 4.565897244973939, "grad_norm": 0.004224702918494795, "kl": 0.31103515625, "learning_rate": 5.76558356426626e-07, "loss": 0.0003109616518486291, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3064, "train_speed(iter/s)": 0.027651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 105.57292175292969, "completions/min_length": 51.0, "epoch": 4.567386448250186, "grad_norm": 0.9151980826366743, "kl": 0.27978515625, "learning_rate": 5.763246421094372e-07, "loss": 0.021056465804576874, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3065, "train_speed(iter/s)": 0.027651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 90.64583587646484, "completions/min_length": 41.5, "epoch": 4.568875651526433, "grad_norm": 0.6827405693620988, "kl": 0.31982421875, "learning_rate": 5.760909107170103e-07, "loss": -0.006030015181750059, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3066, "train_speed(iter/s)": 0.027652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 93.22916984558105, "completions/min_length": 41.5, "epoch": 4.570364854802681, "grad_norm": 0.004256511987802193, "kl": 0.3076171875, "learning_rate": 5.75857162301635e-07, "loss": 0.0003075114800594747, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3067, "train_speed(iter/s)": 0.027652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 103.70833587646484, "completions/min_length": 49.5, "epoch": 4.5718540580789275, "grad_norm": 0.7931880031581882, "kl": 0.27783203125, "learning_rate": 5.756233969156055e-07, "loss": 0.009776686318218708, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3068, "train_speed(iter/s)": 0.027651 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 93.54167175292969, "completions/min_length": 39.25, "epoch": 4.573343261355175, "grad_norm": 0.005446233705774478, "kl": 0.29833984375, "learning_rate": 5.753896146112191e-07, "loss": 0.00029787456151098013, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3069, "train_speed(iter/s)": 0.027652 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 106.95833587646484, "completions/min_length": 49.5, "epoch": 4.574832464631422, "grad_norm": 0.004370841003527892, "kl": 0.289794921875, "learning_rate": 5.751558154407777e-07, "loss": 0.00029003166127949953, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3070, "train_speed(iter/s)": 0.027649 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 106.98958587646484, "completions/min_length": 42.5, "epoch": 4.57632166790767, "grad_norm": 0.004166605508001769, "kl": 0.27392578125, "learning_rate": 5.749219994565863e-07, "loss": 0.00027405243599787354, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3071, "train_speed(iter/s)": 0.027647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 92.28125190734863, "completions/min_length": 43.0, "epoch": 4.5778108711839165, "grad_norm": 0.004695213197855411, "kl": 0.3076171875, "learning_rate": 5.746881667109538e-07, "loss": 0.0003079138696193695, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3072, "train_speed(iter/s)": 0.027647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.5, "completions/mean_length": 108.43750190734863, "completions/min_length": 46.0, "epoch": 4.579300074460164, "grad_norm": 1.6611790189696771, "kl": 0.29296875, "learning_rate": 5.74454317256193e-07, "loss": 0.004627647344022989, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3073, "train_speed(iter/s)": 0.027642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.0, "completions/mean_length": 100.75000381469727, "completions/min_length": 36.5, "epoch": 4.580789277736411, "grad_norm": 0.004445376662041463, "kl": 0.2861328125, "learning_rate": 5.742204511446203e-07, "loss": 0.0002858815423678607, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3074, "train_speed(iter/s)": 0.027642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.75, "completions/mean_length": 90.76041984558105, "completions/min_length": 40.25, "epoch": 4.582278481012658, "grad_norm": 0.004687602520166343, "kl": 0.30517578125, "learning_rate": 5.73986568428556e-07, "loss": 0.0003047608770430088, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3075, "train_speed(iter/s)": 0.027645 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 97.08333587646484, "completions/min_length": 38.5, "epoch": 4.5837676842889055, "grad_norm": 0.00408070731888331, "kl": 0.30517578125, "learning_rate": 5.737526691603237e-07, "loss": 0.00030510290525853634, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3076, "train_speed(iter/s)": 0.027648 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 95.35416984558105, "completions/min_length": 44.0, "epoch": 4.585256887565152, "grad_norm": 0.009213985499585154, "kl": 0.33154296875, "learning_rate": 5.735187533922516e-07, "loss": 0.00033195922151207924, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3077, "train_speed(iter/s)": 0.027647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 100.97916793823242, "completions/min_length": 44.75, "epoch": 4.5867460908414, "grad_norm": 1.109517615833619, "kl": 0.2919921875, "learning_rate": 5.732848211766704e-07, "loss": -0.00036190450191497803, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3078, "train_speed(iter/s)": 0.027647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 101.25000190734863, "completions/min_length": 50.5, "epoch": 4.588235294117647, "grad_norm": 0.7595694444211112, "kl": 0.314453125, "learning_rate": 5.730508725659154e-07, "loss": 0.0070734466426074505, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3079, "train_speed(iter/s)": 0.027646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.75, "completions/mean_length": 105.91667175292969, "completions/min_length": 44.5, "epoch": 4.589724497393894, "grad_norm": 0.005244941840632754, "kl": 0.29638671875, "learning_rate": 5.728169076123251e-07, "loss": 0.00029621212161146104, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3080, "train_speed(iter/s)": 0.027646 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 103.42708396911621, "completions/min_length": 50.5, "epoch": 4.591213700670141, "grad_norm": 1.891952246932922, "kl": 0.29345703125, "learning_rate": 5.725829263682419e-07, "loss": 0.0050641195848584175, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3081, "train_speed(iter/s)": 0.027642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.5, "completions/mean_length": 101.92708587646484, "completions/min_length": 45.0, "epoch": 4.592702903946389, "grad_norm": 0.8395828729878053, "kl": 0.29833984375, "learning_rate": 5.723489288860116e-07, "loss": 0.028336193412542343, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3082, "train_speed(iter/s)": 0.027642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 91.53125190734863, "completions/min_length": 42.0, "epoch": 4.594192107222636, "grad_norm": 0.00435341214651865, "kl": 0.29931640625, "learning_rate": 5.721149152179839e-07, "loss": 0.00029971200274303555, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3083, "train_speed(iter/s)": 0.027642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.0, "completions/mean_length": 112.70833778381348, "completions/min_length": 44.25, "epoch": 4.595681310498883, "grad_norm": 0.004104035452389078, "kl": 0.2734375, "learning_rate": 5.71880885416512e-07, "loss": 0.00027341971872374415, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3084, "train_speed(iter/s)": 0.027638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.5, "completions/mean_length": 98.27083587646484, "completions/min_length": 49.0, "epoch": 4.59717051377513, "grad_norm": 0.0041983847701246605, "kl": 0.3134765625, "learning_rate": 5.716468395339529e-07, "loss": 0.0003126544470433146, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3085, "train_speed(iter/s)": 0.027641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 88.69791984558105, "completions/min_length": 32.75, "epoch": 4.598659717051378, "grad_norm": 0.0037580541329201842, "kl": 0.314453125, "learning_rate": 5.714127776226666e-07, "loss": 0.00031453886185772717, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3086, "train_speed(iter/s)": 0.02764 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 89.47916984558105, "completions/min_length": 40.75, "epoch": 4.600148920327625, "grad_norm": 0.004158646515326495, "kl": 0.29931640625, "learning_rate": 5.711786997350173e-07, "loss": 0.00029886484844610095, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3087, "train_speed(iter/s)": 0.027644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 97.92708778381348, "completions/min_length": 31.25, "epoch": 4.6016381236038715, "grad_norm": 0.004834719535776556, "kl": 0.291015625, "learning_rate": 5.709446059233725e-07, "loss": 0.00029152067145332694, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3088, "train_speed(iter/s)": 0.027644 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 93.57291984558105, "completions/min_length": 40.0, "epoch": 4.603127326880119, "grad_norm": 0.0040687146278018714, "kl": 0.32568359375, "learning_rate": 5.707104962401033e-07, "loss": 0.0003251209273003042, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3089, "train_speed(iter/s)": 0.02764 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 97.19791984558105, "completions/min_length": 39.5, "epoch": 4.604616530156366, "grad_norm": 0.003846547165719283, "kl": 0.30322265625, "learning_rate": 5.704763707375846e-07, "loss": 0.0003029271902050823, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3090, "train_speed(iter/s)": 0.02764 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 97.12500190734863, "completions/min_length": 33.75, "epoch": 4.606105733432614, "grad_norm": 1.0164693980710109, "kl": 0.31298828125, "learning_rate": 5.702422294681943e-07, "loss": -0.018008800223469734, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3091, "train_speed(iter/s)": 0.027641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 98.14583587646484, "completions/min_length": 46.25, "epoch": 4.6075949367088604, "grad_norm": 0.006365028651291597, "kl": 0.3125, "learning_rate": 5.700080724843146e-07, "loss": 0.00031249839230440557, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3092, "train_speed(iter/s)": 0.027637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.0, "completions/mean_length": 101.56250190734863, "completions/min_length": 48.75, "epoch": 4.609084139985108, "grad_norm": 1.2665841805604834, "kl": 0.2783203125, "learning_rate": 5.697738998383302e-07, "loss": -0.0005762053187936544, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.4888460487127304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3093, "train_speed(iter/s)": 0.02764 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 88.23958587646484, "completions/min_length": 38.0, "epoch": 4.610573343261355, "grad_norm": 1.1466121242718141, "kl": 0.29638671875, "learning_rate": 5.695397115826303e-07, "loss": -0.01908187009394169, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3094, "train_speed(iter/s)": 0.027643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 102.31250190734863, "completions/min_length": 33.0, "epoch": 4.612062546537603, "grad_norm": 1.0548123917863153, "kl": 0.271240234375, "learning_rate": 5.693055077696068e-07, "loss": -0.0047218152321875095, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3095, "train_speed(iter/s)": 0.027642 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 97.19791984558105, "completions/min_length": 41.0, "epoch": 4.613551749813849, "grad_norm": 0.8805378897990274, "kl": 0.306640625, "learning_rate": 5.690712884516559e-07, "loss": 0.011951342225074768, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3096, "train_speed(iter/s)": 0.027643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 100.88541793823242, "completions/min_length": 44.75, "epoch": 4.615040953090097, "grad_norm": 0.004213334463319083, "kl": 0.30078125, "learning_rate": 5.688370536811764e-07, "loss": 0.000300124054774642, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3097, "train_speed(iter/s)": 0.027641 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 84.39583587646484, "completions/min_length": 38.5, "epoch": 4.616530156366344, "grad_norm": 1.5775675301239, "kl": 0.30712890625, "learning_rate": 5.68602803510571e-07, "loss": 0.0047623757272958755, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3098, "train_speed(iter/s)": 0.027643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.0, "completions/mean_length": 86.30208587646484, "completions/min_length": 44.0, "epoch": 4.618019359642592, "grad_norm": 1.0601485043201644, "kl": 0.30517578125, "learning_rate": 5.683685379922461e-07, "loss": -0.006991981528699398, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3099, "train_speed(iter/s)": 0.027647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 92.20833587646484, "completions/min_length": 37.75, "epoch": 4.619508562918838, "grad_norm": 0.0044830167987214625, "kl": 0.3125, "learning_rate": 5.68134257178611e-07, "loss": 0.0003118220192845911, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3100, "train_speed(iter/s)": 0.027647 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 94.37500381469727, "completions/min_length": 44.25, "epoch": 4.620997766195085, "grad_norm": 1.2929978293813298, "kl": 0.29345703125, "learning_rate": 5.678999611220786e-07, "loss": -0.005591576918959618, "memory(GiB)": 112.53, "reward": 1.5520834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3101, "train_speed(iter/s)": 0.027643 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 97.17708396911621, "completions/min_length": 38.75, "epoch": 4.622486969471333, "grad_norm": 0.7681380604829823, "kl": 0.31494140625, "learning_rate": 5.676656498750656e-07, "loss": 0.024158736690878868, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3102, "train_speed(iter/s)": 0.027637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 96.26041793823242, "completions/min_length": 39.25, "epoch": 4.62397617274758, "grad_norm": 0.004218982903535503, "kl": 0.28857421875, "learning_rate": 5.674313234899914e-07, "loss": 0.00028898747405037284, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3103, "train_speed(iter/s)": 0.027638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 97.87500381469727, "completions/min_length": 48.5, "epoch": 4.625465376023827, "grad_norm": 0.004275918076110476, "kl": 0.30712890625, "learning_rate": 5.671969820192793e-07, "loss": 0.0003073000116273761, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3104, "train_speed(iter/s)": 0.027636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.0, "completions/mean_length": 90.26041984558105, "completions/min_length": 44.5, "epoch": 4.626954579300074, "grad_norm": 2.3201269895981307, "kl": 0.27392578125, "learning_rate": 5.66962625515356e-07, "loss": 0.009616868570446968, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4681183323264122, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3105, "train_speed(iter/s)": 0.027638 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.75, "completions/mean_length": 107.31250190734863, "completions/min_length": 46.0, "epoch": 4.628443782576322, "grad_norm": 0.004671273770180653, "kl": 0.28857421875, "learning_rate": 5.667282540306513e-07, "loss": 0.00028859719168394804, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3106, "train_speed(iter/s)": 0.027636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 87.75000190734863, "completions/min_length": 51.0, "epoch": 4.629932985852569, "grad_norm": 0.003953182839831478, "kl": 0.314453125, "learning_rate": 5.664938676175981e-07, "loss": 0.0003142441564705223, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3107, "train_speed(iter/s)": 0.027637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 84.02083587646484, "completions/min_length": 40.0, "epoch": 4.631422189128816, "grad_norm": 0.00438028523006285, "kl": 0.3271484375, "learning_rate": 5.662594663286334e-07, "loss": 0.00032776343869045377, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3108, "train_speed(iter/s)": 0.02764 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 92.29166793823242, "completions/min_length": 38.75, "epoch": 4.632911392405063, "grad_norm": 0.0041025618552387455, "kl": 0.2861328125, "learning_rate": 5.660250502161972e-07, "loss": 0.0002858696971088648, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3109, "train_speed(iter/s)": 0.027636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 97.30208778381348, "completions/min_length": 40.25, "epoch": 4.634400595681311, "grad_norm": 0.003832857583025321, "kl": 0.29248046875, "learning_rate": 5.657906193327324e-07, "loss": 0.0002924237633123994, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3110, "train_speed(iter/s)": 0.027634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 98.81250190734863, "completions/min_length": 48.5, "epoch": 4.635889798957558, "grad_norm": 1.0950598599290056, "kl": 0.29296875, "learning_rate": 5.655561737306857e-07, "loss": -0.00409359997138381, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3111, "train_speed(iter/s)": 0.027632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.0, "completions/mean_length": 105.56250190734863, "completions/min_length": 48.5, "epoch": 4.637379002233805, "grad_norm": 0.03529582809019325, "kl": 0.310546875, "learning_rate": 5.653217134625068e-07, "loss": 0.0003106047515757382, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3112, "train_speed(iter/s)": 0.027632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 92.04166984558105, "completions/min_length": 37.25, "epoch": 4.638868205510052, "grad_norm": 0.5843093284239516, "kl": 0.29931640625, "learning_rate": 5.650872385806491e-07, "loss": 0.013277745805680752, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3113, "train_speed(iter/s)": 0.02763 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 87.86458778381348, "completions/min_length": 32.5, "epoch": 4.640357408786299, "grad_norm": 1.703938892973638, "kl": 0.31494140625, "learning_rate": 5.648527491375686e-07, "loss": -0.005362404510378838, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3114, "train_speed(iter/s)": 0.027631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 92.19791984558105, "completions/min_length": 41.5, "epoch": 4.641846612062547, "grad_norm": 1.0719523035565728, "kl": 0.30126953125, "learning_rate": 5.646182451857253e-07, "loss": 0.020889170467853546, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3115, "train_speed(iter/s)": 0.027627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 93.89583396911621, "completions/min_length": 45.25, "epoch": 4.643335815338793, "grad_norm": 0.003831470032305054, "kl": 0.32177734375, "learning_rate": 5.64383726777582e-07, "loss": 0.00032161938725039363, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3116, "train_speed(iter/s)": 0.027623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.0, "completions/mean_length": 93.25000381469727, "completions/min_length": 38.0, "epoch": 4.644825018615041, "grad_norm": 0.004430074122458593, "kl": 0.3037109375, "learning_rate": 5.641491939656045e-07, "loss": 0.0003036551643162966, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3117, "train_speed(iter/s)": 0.027624 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 92.25000190734863, "completions/min_length": 39.25, "epoch": 4.646314221891288, "grad_norm": 0.004108696854022193, "kl": 0.2939453125, "learning_rate": 5.639146468022624e-07, "loss": 0.00029367522802203894, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3118, "train_speed(iter/s)": 0.027627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 91.00000190734863, "completions/min_length": 41.0, "epoch": 4.6478034251675355, "grad_norm": 0.00426474047096489, "kl": 0.2978515625, "learning_rate": 5.636800853400284e-07, "loss": 0.00029756370349787176, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3119, "train_speed(iter/s)": 0.02763 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 91.59375190734863, "completions/min_length": 36.75, "epoch": 4.649292628443782, "grad_norm": 1.0271653111962096, "kl": 0.29541015625, "learning_rate": 5.634455096313779e-07, "loss": 0.015928689390420914, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3120, "train_speed(iter/s)": 0.027629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 88.78125381469727, "completions/min_length": 34.25, "epoch": 4.65078183172003, "grad_norm": 0.004126518862946671, "kl": 0.3125, "learning_rate": 5.632109197287902e-07, "loss": 0.0003125021466985345, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3121, "train_speed(iter/s)": 0.027625 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 92.77083587646484, "completions/min_length": 43.25, "epoch": 4.652271034996277, "grad_norm": 0.004487259697900683, "kl": 0.2939453125, "learning_rate": 5.62976315684747e-07, "loss": 0.0002941047423519194, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3122, "train_speed(iter/s)": 0.027628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 92.05208396911621, "completions/min_length": 44.5, "epoch": 4.6537602382725245, "grad_norm": 0.004479603891109682, "kl": 0.3193359375, "learning_rate": 5.62741697551734e-07, "loss": 0.0003196262987330556, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3123, "train_speed(iter/s)": 0.027627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 94.12500381469727, "completions/min_length": 43.25, "epoch": 4.655249441548771, "grad_norm": 1.787960194141161, "kl": 0.28662109375, "learning_rate": 5.625070653822394e-07, "loss": 0.008428247645497322, "memory(GiB)": 112.53, "reward": 1.854166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3124, "train_speed(iter/s)": 0.027627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.75, "completions/mean_length": 104.48958587646484, "completions/min_length": 45.75, "epoch": 4.656738644825019, "grad_norm": 1.3487331540442535, "kl": 0.3603515625, "learning_rate": 5.622724192287548e-07, "loss": 0.010379520244896412, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.1034691073000431, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4595789238810539, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 3125, "train_speed(iter/s)": 0.027629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 142.5, "completions/mean_length": 84.89583587646484, "completions/min_length": 31.75, "epoch": 4.658227848101266, "grad_norm": 0.8173340302111225, "kl": 0.32958984375, "learning_rate": 5.620377591437748e-07, "loss": -0.011955278925597668, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3126, "train_speed(iter/s)": 0.027631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 86.72916984558105, "completions/min_length": 41.0, "epoch": 4.659717051377513, "grad_norm": 1.5406003962052497, "kl": 0.306640625, "learning_rate": 5.618030851797974e-07, "loss": 0.007122817914932966, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3127, "train_speed(iter/s)": 0.027632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 88.18750381469727, "completions/min_length": 48.5, "epoch": 4.66120625465376, "grad_norm": 0.004308632456977978, "kl": 0.31201171875, "learning_rate": 5.615683973893234e-07, "loss": 0.00031216791830956936, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3128, "train_speed(iter/s)": 0.02763 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 94.22916984558105, "completions/min_length": 41.75, "epoch": 4.662695457930007, "grad_norm": 2.7217022946119593, "kl": 0.322265625, "learning_rate": 5.613336958248569e-07, "loss": 0.0037064380012452602, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.17606099508702755, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.39383460208773613, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3129, "train_speed(iter/s)": 0.027628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 90.12500190734863, "completions/min_length": 37.0, "epoch": 4.664184661206255, "grad_norm": 0.004158670330201678, "kl": 0.3125, "learning_rate": 5.610989805389049e-07, "loss": 0.000312839139951393, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3130, "train_speed(iter/s)": 0.027628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 96.85416984558105, "completions/min_length": 44.5, "epoch": 4.6656738644825015, "grad_norm": 0.003976889848929091, "kl": 0.30615234375, "learning_rate": 5.608642515839777e-07, "loss": 0.000305758323520422, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3131, "train_speed(iter/s)": 0.02763 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.25, "completions/mean_length": 95.57291793823242, "completions/min_length": 45.0, "epoch": 4.667163067758749, "grad_norm": 0.004391029487832941, "kl": 0.32470703125, "learning_rate": 5.606295090125882e-07, "loss": 0.00032453573658131063, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3132, "train_speed(iter/s)": 0.02763 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 92.52083587646484, "completions/min_length": 46.25, "epoch": 4.668652271034996, "grad_norm": 0.004281655496742654, "kl": 0.3212890625, "learning_rate": 5.603947528772531e-07, "loss": 0.00032111734617501497, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3133, "train_speed(iter/s)": 0.027631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.25, "completions/mean_length": 99.45833587646484, "completions/min_length": 37.5, "epoch": 4.670141474311244, "grad_norm": 0.655624645230177, "kl": 0.2919921875, "learning_rate": 5.601599832304914e-07, "loss": -0.000594619195908308, "memory(GiB)": 112.53, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3134, "train_speed(iter/s)": 0.02763 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 98.73958778381348, "completions/min_length": 37.75, "epoch": 4.6716306775874905, "grad_norm": 0.8720380165614918, "kl": 0.309326171875, "learning_rate": 5.599252001248256e-07, "loss": -0.004331556148827076, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.4888460487127304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3135, "train_speed(iter/s)": 0.027632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 97.85416793823242, "completions/min_length": 41.5, "epoch": 4.673119880863738, "grad_norm": 0.004003813806082208, "kl": 0.281494140625, "learning_rate": 5.596904036127807e-07, "loss": 0.0002813737082760781, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3136, "train_speed(iter/s)": 0.027634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.75, "completions/mean_length": 99.39583778381348, "completions/min_length": 42.75, "epoch": 4.674609084139985, "grad_norm": 0.004862119950528106, "kl": 0.3046875, "learning_rate": 5.594555937468856e-07, "loss": 0.0003044994955416769, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3137, "train_speed(iter/s)": 0.027632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 93.62500381469727, "completions/min_length": 37.25, "epoch": 4.676098287416233, "grad_norm": 0.0042306623409286235, "kl": 0.28125, "learning_rate": 5.592207705796712e-07, "loss": 0.0002807436976581812, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3138, "train_speed(iter/s)": 0.027632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 91.02083396911621, "completions/min_length": 39.75, "epoch": 4.6775874906924795, "grad_norm": 0.004228942850547844, "kl": 0.31005859375, "learning_rate": 5.589859341636718e-07, "loss": 0.00031014857813715935, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3139, "train_speed(iter/s)": 0.027635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.0, "completions/mean_length": 92.31250381469727, "completions/min_length": 36.25, "epoch": 4.679076693968726, "grad_norm": 0.00421358584976435, "kl": 0.322265625, "learning_rate": 5.587510845514249e-07, "loss": 0.0003220434591639787, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3140, "train_speed(iter/s)": 0.027633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 98.86458587646484, "completions/min_length": 43.25, "epoch": 4.680565897244974, "grad_norm": 0.004419609278579315, "kl": 0.294921875, "learning_rate": 5.585162217954705e-07, "loss": 0.00029446021653711796, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3141, "train_speed(iter/s)": 0.027634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.0, "completions/mean_length": 98.57291984558105, "completions/min_length": 47.5, "epoch": 4.682055100521221, "grad_norm": 0.13420752933872615, "kl": 0.3173828125, "learning_rate": 5.582813459483519e-07, "loss": 0.00031746161403134465, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3142, "train_speed(iter/s)": 0.027637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 93.52083587646484, "completions/min_length": 43.75, "epoch": 4.6835443037974684, "grad_norm": 0.004604992145810071, "kl": 0.2890625, "learning_rate": 5.580464570626151e-07, "loss": 0.0002891029289457947, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3143, "train_speed(iter/s)": 0.027637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 84.26041793823242, "completions/min_length": 39.5, "epoch": 4.685033507073715, "grad_norm": 0.004714240737648929, "kl": 0.30908203125, "learning_rate": 5.578115551908093e-07, "loss": 0.00030883910949341953, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3144, "train_speed(iter/s)": 0.027636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 95.90625190734863, "completions/min_length": 38.75, "epoch": 4.686522710349963, "grad_norm": 1.0700173978810612, "kl": 0.30615234375, "learning_rate": 5.575766403854859e-07, "loss": 0.001665587886236608, "memory(GiB)": 112.53, "reward": 1.5520833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3145, "train_speed(iter/s)": 0.027634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.5, "completions/mean_length": 92.29166984558105, "completions/min_length": 39.25, "epoch": 4.68801191362621, "grad_norm": 0.005472916174303163, "kl": 0.31005859375, "learning_rate": 5.573417126992002e-07, "loss": 0.0003095996507909149, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3146, "train_speed(iter/s)": 0.027635 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.75, "completions/mean_length": 89.34375381469727, "completions/min_length": 43.5, "epoch": 4.689501116902457, "grad_norm": 0.006998616188422032, "kl": 0.31201171875, "learning_rate": 5.571067721845096e-07, "loss": 0.0003113666025456041, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3147, "train_speed(iter/s)": 0.027636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 93.77083587646484, "completions/min_length": 50.25, "epoch": 4.690990320178704, "grad_norm": 0.004975625719906951, "kl": 0.31396484375, "learning_rate": 5.568718188939749e-07, "loss": 0.000313355412799865, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3148, "train_speed(iter/s)": 0.027634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 92.44791984558105, "completions/min_length": 37.75, "epoch": 4.692479523454952, "grad_norm": 0.004510444726096678, "kl": 0.2939453125, "learning_rate": 5.566368528801595e-07, "loss": 0.00029334169812500477, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3149, "train_speed(iter/s)": 0.027634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.25, "completions/mean_length": 90.20833587646484, "completions/min_length": 46.5, "epoch": 4.693968726731199, "grad_norm": 0.005679881185012703, "kl": 0.31494140625, "learning_rate": 5.564018741956295e-07, "loss": 0.0003147277166135609, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3150, "train_speed(iter/s)": 0.027637 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/mean_length": 91.90625381469727, "completions/min_length": 50.0, "epoch": 4.695457930007446, "grad_norm": 0.004347026343858979, "kl": 0.3310546875, "learning_rate": 5.561668828929538e-07, "loss": 0.0003310296160634607, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3151, "train_speed(iter/s)": 0.027636 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 95.58333587646484, "completions/min_length": 43.5, "epoch": 4.696947133283693, "grad_norm": 0.774706235394054, "kl": 0.3154296875, "learning_rate": 5.559318790247046e-07, "loss": 0.002223848830908537, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3152, "train_speed(iter/s)": 0.027632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 98.32291793823242, "completions/min_length": 44.5, "epoch": 4.69843633655994, "grad_norm": 0.004424541487643223, "kl": 0.2890625, "learning_rate": 5.556968626434565e-07, "loss": 0.0002889474271796644, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3153, "train_speed(iter/s)": 0.027632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 87.96875190734863, "completions/min_length": 38.75, "epoch": 4.699925539836188, "grad_norm": 0.5870830814291473, "kl": 0.31494140625, "learning_rate": 5.554618338017873e-07, "loss": -0.006800379138439894, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3154, "train_speed(iter/s)": 0.027633 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 99.50000381469727, "completions/min_length": 47.0, "epoch": 4.701414743112435, "grad_norm": 1.5306959104180966, "kl": 0.3125, "learning_rate": 5.552267925522769e-07, "loss": -0.002682241378352046, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3155, "train_speed(iter/s)": 0.027631 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 93.25000190734863, "completions/min_length": 38.5, "epoch": 4.702903946388682, "grad_norm": 0.004688970367752276, "kl": 0.3017578125, "learning_rate": 5.549917389475086e-07, "loss": 0.0003014977555721998, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3156, "train_speed(iter/s)": 0.027634 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 105.66666793823242, "completions/min_length": 39.0, "epoch": 4.704393149664929, "grad_norm": 0.005290129685583653, "kl": 0.28271484375, "learning_rate": 5.547566730400683e-07, "loss": 0.0002820183872245252, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3157, "train_speed(iter/s)": 0.027632 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 92.63541984558105, "completions/min_length": 36.25, "epoch": 4.705882352941177, "grad_norm": 1.1683942773316593, "kl": 0.31884765625, "learning_rate": 5.545215948825446e-07, "loss": 0.016508422791957855, "memory(GiB)": 112.53, "reward": 1.7083334028720856, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3158, "train_speed(iter/s)": 0.027628 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 88.30208778381348, "completions/min_length": 38.25, "epoch": 4.707371556217423, "grad_norm": 0.004549269146520237, "kl": 0.3154296875, "learning_rate": 5.542865045275285e-07, "loss": 0.00031519983895123005, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3159, "train_speed(iter/s)": 0.027626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.25, "completions/mean_length": 92.62500190734863, "completions/min_length": 36.0, "epoch": 4.708860759493671, "grad_norm": 0.004422498775157734, "kl": 0.29150390625, "learning_rate": 5.540514020276146e-07, "loss": 0.00029182338039390743, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3160, "train_speed(iter/s)": 0.027626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 84.48958778381348, "completions/min_length": 34.0, "epoch": 4.710349962769918, "grad_norm": 0.008538146632799311, "kl": 0.328125, "learning_rate": 5.538162874353993e-07, "loss": 0.0003282455145381391, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3161, "train_speed(iter/s)": 0.027629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 97.87500381469727, "completions/min_length": 38.25, "epoch": 4.711839166046166, "grad_norm": 1.9129005124176943, "kl": 0.28662109375, "learning_rate": 5.535811608034824e-07, "loss": 0.017637744545936584, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3162, "train_speed(iter/s)": 0.027629 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.25, "completions/mean_length": 95.60416793823242, "completions/min_length": 29.0, "epoch": 4.713328369322412, "grad_norm": 0.003976545108885511, "kl": 0.29541015625, "learning_rate": 5.533460221844659e-07, "loss": 0.0002950101043097675, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3163, "train_speed(iter/s)": 0.027627 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 92.10416984558105, "completions/min_length": 37.5, "epoch": 4.71481757259866, "grad_norm": 0.0036422083860003095, "kl": 0.29345703125, "learning_rate": 5.531108716309547e-07, "loss": 0.00029345921939238906, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3164, "train_speed(iter/s)": 0.02763 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 95.05208587646484, "completions/min_length": 41.5, "epoch": 4.716306775874907, "grad_norm": 0.0043486896256026905, "kl": 0.3017578125, "learning_rate": 5.528757091955562e-07, "loss": 0.00030187610536813736, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3165, "train_speed(iter/s)": 0.027626 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/mean_length": 103.53125190734863, "completions/min_length": 34.0, "epoch": 4.717795979151154, "grad_norm": 0.004310748945990693, "kl": 0.28564453125, "learning_rate": 5.526405349308807e-07, "loss": 0.00028548113186843693, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3166, "train_speed(iter/s)": 0.027621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 94.87500190734863, "completions/min_length": 44.25, "epoch": 4.719285182427401, "grad_norm": 0.004197058420328614, "kl": 0.30615234375, "learning_rate": 5.524053488895413e-07, "loss": 0.0003056568093597889, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3167, "train_speed(iter/s)": 0.027619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.5, "completions/mean_length": 96.01041793823242, "completions/min_length": 41.5, "epoch": 4.720774385703649, "grad_norm": 0.004718647095628165, "kl": 0.30419921875, "learning_rate": 5.521701511241532e-07, "loss": 0.00030400132527574897, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3168, "train_speed(iter/s)": 0.027614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.0, "completions/mean_length": 105.60416984558105, "completions/min_length": 43.5, "epoch": 4.722263588979896, "grad_norm": 0.8117561526824927, "kl": 0.396484375, "learning_rate": 5.519349416873346e-07, "loss": 0.01997162401676178, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3169, "train_speed(iter/s)": 0.027609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/mean_length": 97.70833396911621, "completions/min_length": 37.75, "epoch": 4.723752792256143, "grad_norm": 0.7542394792468408, "kl": 0.31982421875, "learning_rate": 5.51699720631706e-07, "loss": 0.0014876986388117075, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3170, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.5, "completions/mean_length": 96.04166793823242, "completions/min_length": 40.75, "epoch": 4.72524199553239, "grad_norm": 0.004578855938998259, "kl": 0.2802734375, "learning_rate": 5.514644880098911e-07, "loss": 0.0002802886883728206, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3171, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 89.11458587646484, "completions/min_length": 35.25, "epoch": 4.726731198808637, "grad_norm": 0.004216063565978387, "kl": 0.29443359375, "learning_rate": 5.512292438745155e-07, "loss": 0.0002948221517726779, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3172, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 105.56250381469727, "completions/min_length": 36.75, "epoch": 4.728220402084885, "grad_norm": 0.004780376314991864, "kl": 0.2880859375, "learning_rate": 5.509939882782077e-07, "loss": 0.0002882498665712774, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3173, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 90.11458396911621, "completions/min_length": 33.25, "epoch": 4.729709605361132, "grad_norm": 0.00472329186349181, "kl": 0.302734375, "learning_rate": 5.507587212735989e-07, "loss": 0.0003029662184417248, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3174, "train_speed(iter/s)": 0.02761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 96.06250190734863, "completions/min_length": 38.25, "epoch": 4.731198808637379, "grad_norm": 1.009587096774714, "kl": 0.3017578125, "learning_rate": 5.505234429133226e-07, "loss": 0.010584674775600433, "memory(GiB)": 112.53, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3175, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 87.10416984558105, "completions/min_length": 40.75, "epoch": 4.732688011913626, "grad_norm": 1.2637037606613475, "kl": 0.3203125, "learning_rate": 5.502881532500149e-07, "loss": 0.0012183691142126918, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.1942163035273552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3176, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 87.62500190734863, "completions/min_length": 36.0, "epoch": 4.734177215189874, "grad_norm": 0.004127694321887035, "kl": 0.314453125, "learning_rate": 5.500528523363146e-07, "loss": 0.00031455562566407025, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3177, "train_speed(iter/s)": 0.027603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.5, "completions/mean_length": 105.15625381469727, "completions/min_length": 44.75, "epoch": 4.735666418466121, "grad_norm": 0.004744108911759179, "kl": 0.27197265625, "learning_rate": 5.498175402248626e-07, "loss": 0.0002718919422477484, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3178, "train_speed(iter/s)": 0.027603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 90.55208396911621, "completions/min_length": 47.5, "epoch": 4.737155621742367, "grad_norm": 0.004653820447366807, "kl": 0.279296875, "learning_rate": 5.495822169683028e-07, "loss": 0.0002792346349451691, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3179, "train_speed(iter/s)": 0.027603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 91.69792175292969, "completions/min_length": 43.0, "epoch": 4.738644825018615, "grad_norm": 0.004259206900825568, "kl": 0.30322265625, "learning_rate": 5.493468826192816e-07, "loss": 0.00030262977816164494, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3180, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.25, "completions/mean_length": 84.65625381469727, "completions/min_length": 45.75, "epoch": 4.740134028294863, "grad_norm": 0.003955857844303894, "kl": 0.30224609375, "learning_rate": 5.491115372304472e-07, "loss": 0.0003024415345862508, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3181, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 92.85416793823242, "completions/min_length": 47.75, "epoch": 4.7416232315711095, "grad_norm": 0.004515737333188055, "kl": 0.31396484375, "learning_rate": 5.488761808544509e-07, "loss": 0.0003138421452604234, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3182, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 101.43750190734863, "completions/min_length": 38.0, "epoch": 4.743112434847356, "grad_norm": 1.04946009022951, "kl": 0.270751953125, "learning_rate": 5.486408135439464e-07, "loss": -0.00783338863402605, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3183, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.25, "completions/mean_length": 92.87500381469727, "completions/min_length": 37.0, "epoch": 4.744601638123604, "grad_norm": 1.5649075665405243, "kl": 0.30615234375, "learning_rate": 5.484054353515895e-07, "loss": 0.01093986164778471, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.30704472959041595, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3184, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 138.0, "completions/mean_length": 83.06250381469727, "completions/min_length": 45.5, "epoch": 4.746090841399851, "grad_norm": 0.6239598744413947, "kl": 0.31787109375, "learning_rate": 5.48170046330039e-07, "loss": 0.013351254165172577, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3185, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.0, "completions/mean_length": 86.20833396911621, "completions/min_length": 41.75, "epoch": 4.7475800446760985, "grad_norm": 1.4880532622733813, "kl": 0.2978515625, "learning_rate": 5.479346465319555e-07, "loss": -0.006748414598405361, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3186, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.25, "completions/mean_length": 98.41666984558105, "completions/min_length": 42.75, "epoch": 4.749069247952345, "grad_norm": 1.0161935533376218, "kl": 0.2822265625, "learning_rate": 5.476992360100024e-07, "loss": -0.005748086608946323, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3187, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 85.41666984558105, "completions/min_length": 47.5, "epoch": 4.750558451228593, "grad_norm": 1.105709965451852, "kl": 0.3037109375, "learning_rate": 5.474638148168455e-07, "loss": -0.02143096923828125, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3188, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 94.83333778381348, "completions/min_length": 39.25, "epoch": 4.75204765450484, "grad_norm": 0.7741078326625457, "kl": 0.30322265625, "learning_rate": 5.472283830051527e-07, "loss": 0.0009081304306164384, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3189, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.25, "completions/mean_length": 91.31250190734863, "completions/min_length": 45.75, "epoch": 4.7535368577810875, "grad_norm": 1.853306656653699, "kl": 0.30224609375, "learning_rate": 5.469929406275947e-07, "loss": 0.007941586896777153, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3190, "train_speed(iter/s)": 0.027605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 90.01041984558105, "completions/min_length": 39.75, "epoch": 4.755026061057334, "grad_norm": 0.9913962899976855, "kl": 0.296875, "learning_rate": 5.467574877368441e-07, "loss": 0.001532522845081985, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3191, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 95.68750190734863, "completions/min_length": 42.0, "epoch": 4.756515264333581, "grad_norm": 1.113320416003822, "kl": 0.29345703125, "learning_rate": 5.465220243855761e-07, "loss": 0.033921945840120316, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3192, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 94.05208587646484, "completions/min_length": 37.75, "epoch": 4.758004467609829, "grad_norm": 0.00426904540141878, "kl": 0.299560546875, "learning_rate": 5.462865506264684e-07, "loss": 0.00030011084163561463, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3193, "train_speed(iter/s)": 0.027609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 91.60416984558105, "completions/min_length": 41.25, "epoch": 4.759493670886076, "grad_norm": 0.02824468171299836, "kl": 0.30615234375, "learning_rate": 5.460510665122007e-07, "loss": 0.00030605224310420454, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3194, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 89.23958587646484, "completions/min_length": 43.0, "epoch": 4.760982874162323, "grad_norm": 0.005693448856266036, "kl": 0.314453125, "learning_rate": 5.458155720954553e-07, "loss": 0.00031430093804374337, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3195, "train_speed(iter/s)": 0.027615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 98.47916984558105, "completions/min_length": 44.75, "epoch": 4.76247207743857, "grad_norm": 0.5377192266217263, "kl": 0.264892578125, "learning_rate": 5.455800674289164e-07, "loss": -0.016825877130031586, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3196, "train_speed(iter/s)": 0.027613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 97.94792175292969, "completions/min_length": 43.75, "epoch": 4.763961280714818, "grad_norm": 0.004652618052517972, "kl": 0.28076171875, "learning_rate": 5.45344552565271e-07, "loss": 0.0002801434602588415, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3197, "train_speed(iter/s)": 0.027616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.5, "completions/mean_length": 104.23958587646484, "completions/min_length": 42.25, "epoch": 4.7654504839910645, "grad_norm": 2.2582577947371654, "kl": 1.013671875, "learning_rate": 5.451090275572081e-07, "loss": -0.001137480023317039, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3198, "train_speed(iter/s)": 0.027613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 93.59375381469727, "completions/min_length": 41.75, "epoch": 4.766939687267312, "grad_norm": 0.7156128183099544, "kl": 0.2861328125, "learning_rate": 5.448734924574189e-07, "loss": 0.013128682971000671, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3199, "train_speed(iter/s)": 0.027616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 102.65625190734863, "completions/min_length": 38.0, "epoch": 4.768428890543559, "grad_norm": 1.1012437233506014, "kl": 0.32861328125, "learning_rate": 5.446379473185971e-07, "loss": 0.0027511059306561947, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3200, "train_speed(iter/s)": 0.027615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.5, "completions/mean_length": 101.85417175292969, "completions/min_length": 38.75, "epoch": 4.769918093819807, "grad_norm": 1.0830195923492547, "kl": 0.27685546875, "learning_rate": 5.444023921934386e-07, "loss": 0.009630306623876095, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3201, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 97.35416984558105, "completions/min_length": 41.5, "epoch": 4.7714072970960535, "grad_norm": 0.8501829412997484, "kl": 0.2919921875, "learning_rate": 5.441668271346412e-07, "loss": 0.039997030049562454, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3202, "train_speed(iter/s)": 0.02761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 95.97916984558105, "completions/min_length": 39.0, "epoch": 4.772896500372301, "grad_norm": 0.00518306500986162, "kl": 0.286865234375, "learning_rate": 5.439312521949053e-07, "loss": 0.00028675998328253627, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3203, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 99.40625190734863, "completions/min_length": 50.0, "epoch": 4.774385703648548, "grad_norm": 0.005782220376250764, "kl": 0.283203125, "learning_rate": 5.436956674269333e-07, "loss": 0.00028331155772320926, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3204, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 89.68750190734863, "completions/min_length": 42.75, "epoch": 4.775874906924795, "grad_norm": 0.004683285742096426, "kl": 0.3203125, "learning_rate": 5.434600728834302e-07, "loss": 0.0003199205966666341, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3205, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.25, "completions/mean_length": 92.65625190734863, "completions/min_length": 44.25, "epoch": 4.7773641102010425, "grad_norm": 1.9203356059172056, "kl": 0.2939453125, "learning_rate": 5.432244686171024e-07, "loss": 0.0015131640248000622, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3206, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 97.80208396911621, "completions/min_length": 39.5, "epoch": 4.77885331347729, "grad_norm": 0.0043039401402603355, "kl": 0.29541015625, "learning_rate": 5.429888546806595e-07, "loss": 0.0002950133930426091, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3207, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.5, "completions/mean_length": 97.87500190734863, "completions/min_length": 43.5, "epoch": 4.780342516753537, "grad_norm": 0.004449729236847841, "kl": 0.28173828125, "learning_rate": 5.427532311268121e-07, "loss": 0.00028162228409200907, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3208, "train_speed(iter/s)": 0.027599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 90.18750381469727, "completions/min_length": 32.0, "epoch": 4.781831720029784, "grad_norm": 1.6731282664707585, "kl": 0.30517578125, "learning_rate": 5.425175980082739e-07, "loss": -0.0027269728016108274, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3209, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.25, "completions/mean_length": 91.86458587646484, "completions/min_length": 43.5, "epoch": 4.783320923306031, "grad_norm": 0.004445668345894657, "kl": 0.2998046875, "learning_rate": 5.422819553777603e-07, "loss": 0.00030005909502506256, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3210, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/mean_length": 96.71875381469727, "completions/min_length": 42.75, "epoch": 4.784810126582278, "grad_norm": 0.004789000185810356, "kl": 0.2958984375, "learning_rate": 5.420463032879891e-07, "loss": 0.00029579660622403026, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3211, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.75, "completions/mean_length": 84.96875381469727, "completions/min_length": 32.25, "epoch": 4.786299329858526, "grad_norm": 0.004028052080040151, "kl": 0.310546875, "learning_rate": 5.418106417916799e-07, "loss": 0.00031047442462295294, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3212, "train_speed(iter/s)": 0.027601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 98.18750381469727, "completions/min_length": 51.0, "epoch": 4.787788533134773, "grad_norm": 0.004667925005870426, "kl": 0.2880859375, "learning_rate": 5.415749709415546e-07, "loss": 0.0002879882522393018, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3213, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 87.69791793823242, "completions/min_length": 40.75, "epoch": 4.78927773641102, "grad_norm": 0.005375406783337542, "kl": 0.30517578125, "learning_rate": 5.413392907903369e-07, "loss": 0.0003053680120501667, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3214, "train_speed(iter/s)": 0.027605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 87.03125190734863, "completions/min_length": 41.5, "epoch": 4.790766939687267, "grad_norm": 0.004621618032338772, "kl": 0.28515625, "learning_rate": 5.411036013907533e-07, "loss": 0.00028500391636043787, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3215, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 90.52083587646484, "completions/min_length": 40.0, "epoch": 4.792256142963515, "grad_norm": 0.004788920129890738, "kl": 0.3134765625, "learning_rate": 5.408679027955314e-07, "loss": 0.00031364566530101, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3216, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.25, "completions/mean_length": 100.34375190734863, "completions/min_length": 44.25, "epoch": 4.793745346239762, "grad_norm": 0.005583785782934857, "kl": 0.30517578125, "learning_rate": 5.406321950574017e-07, "loss": 0.0003053427499253303, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3217, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 92.86458587646484, "completions/min_length": 41.5, "epoch": 4.7952345495160085, "grad_norm": 1.7932857505233928, "kl": 0.2880859375, "learning_rate": 5.403964782290961e-07, "loss": -0.014348732307553291, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.2973194234073162, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3218, "train_speed(iter/s)": 0.027601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 91.06250381469727, "completions/min_length": 36.5, "epoch": 4.796723752792256, "grad_norm": 0.004839600669481329, "kl": 0.306640625, "learning_rate": 5.401607523633493e-07, "loss": 0.00030615448486059904, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3219, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 90.80208587646484, "completions/min_length": 45.0, "epoch": 4.798212956068504, "grad_norm": 0.0052907957579298006, "kl": 0.30029296875, "learning_rate": 5.39925017512897e-07, "loss": 0.0002997168921865523, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3220, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 84.35416984558105, "completions/min_length": 38.25, "epoch": 4.799702159344751, "grad_norm": 0.004293505857919849, "kl": 0.32666015625, "learning_rate": 5.396892737304778e-07, "loss": 0.00032603408908471465, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3221, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 93.71875381469727, "completions/min_length": 43.25, "epoch": 4.801191362620997, "grad_norm": 0.9849633463577454, "kl": 0.2939453125, "learning_rate": 5.394535210688316e-07, "loss": 0.007287831977009773, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3222, "train_speed(iter/s)": 0.027597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 89.12500190734863, "completions/min_length": 43.75, "epoch": 4.802680565897245, "grad_norm": 0.005033691349502812, "kl": 0.2900390625, "learning_rate": 5.392177595807011e-07, "loss": 0.00028949847910553217, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3223, "train_speed(iter/s)": 0.027595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 88.03125381469727, "completions/min_length": 38.5, "epoch": 4.804169769173492, "grad_norm": 0.7406132145042378, "kl": 0.3056640625, "learning_rate": 5.389819893188303e-07, "loss": 0.008326543495059013, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3224, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 82.32291984558105, "completions/min_length": 45.0, "epoch": 4.80565897244974, "grad_norm": 0.005353806076486664, "kl": 0.296875, "learning_rate": 5.387462103359655e-07, "loss": 0.00029587437165901065, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3225, "train_speed(iter/s)": 0.027599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 85.56250190734863, "completions/min_length": 44.5, "epoch": 4.807148175725986, "grad_norm": 0.0045992382444739206, "kl": 0.318359375, "learning_rate": 5.385104226848547e-07, "loss": 0.0003188624687027186, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3226, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 89.76041793823242, "completions/min_length": 42.0, "epoch": 4.808637379002234, "grad_norm": 0.0049158644687885625, "kl": 0.30224609375, "learning_rate": 5.38274626418248e-07, "loss": 0.0003020649601239711, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3227, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 92.72916984558105, "completions/min_length": 38.5, "epoch": 4.810126582278481, "grad_norm": 0.004260180709269798, "kl": 0.306640625, "learning_rate": 5.380388215888971e-07, "loss": 0.00030659596086479723, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3228, "train_speed(iter/s)": 0.027601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 88.27083587646484, "completions/min_length": 42.5, "epoch": 4.811615785554729, "grad_norm": 0.7767655769479248, "kl": 0.39501953125, "learning_rate": 5.378030082495564e-07, "loss": 0.04013853892683983, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3229, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 83.95833778381348, "completions/min_length": 35.25, "epoch": 4.813104988830975, "grad_norm": 0.7539540439607534, "kl": 0.31298828125, "learning_rate": 5.375671864529816e-07, "loss": -0.0027217434253543615, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3230, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 83.62500190734863, "completions/min_length": 40.5, "epoch": 4.814594192107222, "grad_norm": 1.8619017267343072, "kl": 0.306640625, "learning_rate": 5.373313562519304e-07, "loss": 0.005350043997168541, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.49164988845586777, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3231, "train_speed(iter/s)": 0.027599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 89.00000190734863, "completions/min_length": 40.0, "epoch": 4.81608339538347, "grad_norm": 0.049359319333256574, "kl": 0.33935546875, "learning_rate": 5.370955176991623e-07, "loss": 0.00033960150904022157, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3232, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.5, "completions/mean_length": 82.09375190734863, "completions/min_length": 44.5, "epoch": 4.8175725986597175, "grad_norm": 0.0046781310692430985, "kl": 0.318359375, "learning_rate": 5.368596708474387e-07, "loss": 0.00031849148217588663, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3233, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 85.97916984558105, "completions/min_length": 45.25, "epoch": 4.819061801935964, "grad_norm": 0.0049668989899528905, "kl": 0.326171875, "learning_rate": 5.36623815749523e-07, "loss": 0.0003257913049310446, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3234, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 88.86458396911621, "completions/min_length": 42.5, "epoch": 4.820551005212211, "grad_norm": 1.7408283023466902, "kl": 0.314453125, "learning_rate": 5.363879524581802e-07, "loss": 0.0006062883767299354, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3235, "train_speed(iter/s)": 0.027595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.75, "completions/mean_length": 85.81250190734863, "completions/min_length": 45.0, "epoch": 4.822040208488459, "grad_norm": 1.0517337202362362, "kl": 0.3046875, "learning_rate": 5.361520810261778e-07, "loss": -0.008340410888195038, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3236, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 83.41666793823242, "completions/min_length": 38.0, "epoch": 4.823529411764706, "grad_norm": 0.004820119829435869, "kl": 0.306640625, "learning_rate": 5.359162015062841e-07, "loss": 0.0003067560028284788, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3237, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.5, "completions/mean_length": 87.86458396911621, "completions/min_length": 38.0, "epoch": 4.825018615040953, "grad_norm": 0.005057534680348827, "kl": 0.32080078125, "learning_rate": 5.356803139512698e-07, "loss": 0.000320744322380051, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3238, "train_speed(iter/s)": 0.027595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 87.19791793823242, "completions/min_length": 46.5, "epoch": 4.8265078183172, "grad_norm": 0.0051076831459842195, "kl": 0.3193359375, "learning_rate": 5.354444184139076e-07, "loss": 0.0003194566525053233, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3239, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.5, "completions/mean_length": 89.58333587646484, "completions/min_length": 40.5, "epoch": 4.827997021593448, "grad_norm": 0.005122684920287251, "kl": 0.30419921875, "learning_rate": 5.352085149469717e-07, "loss": 0.00030324666295200586, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3240, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 82.77083587646484, "completions/min_length": 38.5, "epoch": 4.829486224869695, "grad_norm": 1.8162366888087589, "kl": 0.29150390625, "learning_rate": 5.349726036032375e-07, "loss": 9.033138485392556e-05, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3241, "train_speed(iter/s)": 0.027597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 85.41666793823242, "completions/min_length": 38.75, "epoch": 4.830975428145942, "grad_norm": 0.0047731339814312546, "kl": 0.33740234375, "learning_rate": 5.347366844354833e-07, "loss": 0.0003376053355168551, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3242, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 88.39583587646484, "completions/min_length": 38.25, "epoch": 4.832464631422189, "grad_norm": 0.005719550796632346, "kl": 0.31494140625, "learning_rate": 5.345007574964885e-07, "loss": 0.0003153685829602182, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3243, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 90.05208587646484, "completions/min_length": 41.5, "epoch": 4.833953834698436, "grad_norm": 0.005623552746705836, "kl": 0.29638671875, "learning_rate": 5.342648228390343e-07, "loss": 0.00029608880868181586, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3244, "train_speed(iter/s)": 0.027597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.75, "completions/mean_length": 83.22916793823242, "completions/min_length": 38.75, "epoch": 4.8354430379746836, "grad_norm": 0.004988974661019369, "kl": 0.33740234375, "learning_rate": 5.340288805159036e-07, "loss": 0.00033788324799388647, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3245, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 86.61458587646484, "completions/min_length": 46.75, "epoch": 4.836932241250931, "grad_norm": 1.2119134307158892, "kl": 0.30322265625, "learning_rate": 5.337929305798811e-07, "loss": -0.011678218841552734, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3246, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 80.82291984558105, "completions/min_length": 41.75, "epoch": 4.838421444527178, "grad_norm": 0.008991006795643656, "kl": 0.33251953125, "learning_rate": 5.335569730837532e-07, "loss": 0.00033299956703558564, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3247, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 84.01041984558105, "completions/min_length": 45.75, "epoch": 4.839910647803425, "grad_norm": 0.00531641077733349, "kl": 0.32373046875, "learning_rate": 5.333210080803078e-07, "loss": 0.0003241106460336596, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3248, "train_speed(iter/s)": 0.027603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 84.94791984558105, "completions/min_length": 34.75, "epoch": 4.8413998510796725, "grad_norm": 1.728102923817512, "kl": 0.30322265625, "learning_rate": 5.330850356223351e-07, "loss": 0.000303322245599702, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3249, "train_speed(iter/s)": 0.027601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 78.08333587646484, "completions/min_length": 37.75, "epoch": 4.842889054355919, "grad_norm": 2.0600850405793265, "kl": 0.349609375, "learning_rate": 5.328490557626261e-07, "loss": 0.010166858322918415, "memory(GiB)": 112.53, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3250, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 85.32291984558105, "completions/min_length": 40.25, "epoch": 4.844378257632167, "grad_norm": 1.1602953549801447, "kl": 0.34765625, "learning_rate": 5.326130685539739e-07, "loss": 7.23799312254414e-05, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3251, "train_speed(iter/s)": 0.027605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.75, "completions/mean_length": 82.33333587646484, "completions/min_length": 43.0, "epoch": 4.845867460908414, "grad_norm": 0.7089166402022221, "kl": 0.32373046875, "learning_rate": 5.323770740491735e-07, "loss": -0.0008404998807236552, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3252, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 94.09375190734863, "completions/min_length": 46.75, "epoch": 4.8473566641846615, "grad_norm": 0.0047342463423201715, "kl": 0.3017578125, "learning_rate": 5.321410723010209e-07, "loss": 0.00030172031256370246, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3253, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 82.86458587646484, "completions/min_length": 41.0, "epoch": 4.848845867460908, "grad_norm": 0.004083147767280807, "kl": 0.30322265625, "learning_rate": 5.319050633623141e-07, "loss": 0.00030321869417093694, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3254, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 84.88541984558105, "completions/min_length": 31.0, "epoch": 4.850335070737156, "grad_norm": 2.2782570909945705, "kl": 0.298828125, "learning_rate": 5.316690472858531e-07, "loss": 0.017422756180167198, "memory(GiB)": 112.53, "reward": 1.854166716337204, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.28614169359207153, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3255, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 87.20833587646484, "completions/min_length": 47.0, "epoch": 4.851824274013403, "grad_norm": 1.2381991565166524, "kl": 0.31787109375, "learning_rate": 5.314330241244387e-07, "loss": 0.0014801195356994867, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.44130611419677734, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3256, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 91.68750190734863, "completions/min_length": 43.25, "epoch": 4.85331347728965, "grad_norm": 1.0970873232098108, "kl": 0.29931640625, "learning_rate": 5.311969939308736e-07, "loss": -0.0056366268545389175, "memory(GiB)": 112.53, "reward": 1.875, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.1276884824037552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3257, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 84.21875, "completions/min_length": 37.25, "epoch": 4.854802680565897, "grad_norm": 0.004818123811747069, "kl": 0.3203125, "learning_rate": 5.309609567579624e-07, "loss": 0.0003199958009645343, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3258, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 80.75000190734863, "completions/min_length": 41.0, "epoch": 4.856291883842145, "grad_norm": 0.004345624928741383, "kl": 0.326171875, "learning_rate": 5.307249126585108e-07, "loss": 0.0003263828984927386, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3259, "train_speed(iter/s)": 0.027614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 84.97916793823242, "completions/min_length": 45.75, "epoch": 4.857781087118392, "grad_norm": 0.004577945883513918, "kl": 0.3076171875, "learning_rate": 5.304888616853264e-07, "loss": 0.000308243470499292, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3260, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 89.42708778381348, "completions/min_length": 43.75, "epoch": 4.8592702903946385, "grad_norm": 0.005004111202103345, "kl": 0.31689453125, "learning_rate": 5.302528038912179e-07, "loss": 0.00031651335302740335, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3261, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 80.55208587646484, "completions/min_length": 42.5, "epoch": 4.860759493670886, "grad_norm": 2.2887421773160987, "kl": 0.31884765625, "learning_rate": 5.300167393289961e-07, "loss": -0.011731646955013275, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.33919306844472885, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3262, "train_speed(iter/s)": 0.027615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 87.21875381469727, "completions/min_length": 37.5, "epoch": 4.862248696947133, "grad_norm": 0.004562449213432868, "kl": 0.30126953125, "learning_rate": 5.29780668051473e-07, "loss": 0.0003017950220964849, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3263, "train_speed(iter/s)": 0.027616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 85.66666793823242, "completions/min_length": 37.75, "epoch": 4.863737900223381, "grad_norm": 0.004779791772398305, "kl": 0.30810546875, "learning_rate": 5.295445901114621e-07, "loss": 0.00030847900779917836, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3264, "train_speed(iter/s)": 0.027613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 77.75000381469727, "completions/min_length": 37.5, "epoch": 4.8652271034996275, "grad_norm": 0.0051490194163426786, "kl": 0.33154296875, "learning_rate": 5.293085055617782e-07, "loss": 0.00033131445525214076, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3265, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.25, "completions/mean_length": 92.07291984558105, "completions/min_length": 36.25, "epoch": 4.866716306775875, "grad_norm": 0.786912691495125, "kl": 0.29931640625, "learning_rate": 5.290724144552379e-07, "loss": 0.0021553938277065754, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3266, "train_speed(iter/s)": 0.027609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 80.48958587646484, "completions/min_length": 40.25, "epoch": 4.868205510052122, "grad_norm": 0.004653220921452278, "kl": 0.31591796875, "learning_rate": 5.288363168446592e-07, "loss": 0.0003148912510368973, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3267, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.25, "completions/mean_length": 76.40625190734863, "completions/min_length": 36.5, "epoch": 4.86969471332837, "grad_norm": 1.8100854132194035, "kl": 0.328125, "learning_rate": 5.286002127828617e-07, "loss": 0.005083777941763401, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3268, "train_speed(iter/s)": 0.027609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 89.58333587646484, "completions/min_length": 40.75, "epoch": 4.8711839166046165, "grad_norm": 1.892110428934653, "kl": 0.31005859375, "learning_rate": 5.283641023226661e-07, "loss": -0.017517518252134323, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.12483403459191322, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3803912103176117, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3269, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 135.0, "completions/mean_length": 74.73958778381348, "completions/min_length": 38.0, "epoch": 4.872673119880863, "grad_norm": 0.004945415354194299, "kl": 0.34326171875, "learning_rate": 5.281279855168945e-07, "loss": 0.0003429385251365602, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3270, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.5, "completions/mean_length": 86.23958778381348, "completions/min_length": 37.5, "epoch": 4.874162323157111, "grad_norm": 0.00509183237146193, "kl": 0.31005859375, "learning_rate": 5.278918624183708e-07, "loss": 0.00030949112260714173, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3271, "train_speed(iter/s)": 0.02761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 85.45833587646484, "completions/min_length": 38.5, "epoch": 4.875651526433359, "grad_norm": 0.004472419058772164, "kl": 0.30908203125, "learning_rate": 5.276557330799203e-07, "loss": 0.00030916338437236845, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3272, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 82.75000190734863, "completions/min_length": 44.75, "epoch": 4.877140729709605, "grad_norm": 0.004760672531419166, "kl": 0.3193359375, "learning_rate": 5.27419597554369e-07, "loss": 0.0003191200958099216, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3273, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 81.64583778381348, "completions/min_length": 37.5, "epoch": 4.878629932985852, "grad_norm": 1.0212525249891913, "kl": 0.32373046875, "learning_rate": 5.271834558945454e-07, "loss": 0.01665688119828701, "memory(GiB)": 112.53, "reward": 1.5520833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3274, "train_speed(iter/s)": 0.027609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.5, "completions/mean_length": 83.06250190734863, "completions/min_length": 40.25, "epoch": 4.8801191362621, "grad_norm": 1.2383229108967462, "kl": 0.32763671875, "learning_rate": 5.269473081532784e-07, "loss": -0.0016615248750895262, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3275, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 86.71875381469727, "completions/min_length": 39.0, "epoch": 4.881608339538347, "grad_norm": 1.2532432784796268, "kl": 0.30859375, "learning_rate": 5.267111543833985e-07, "loss": 0.0014008774887770414, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3276, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 82.86458587646484, "completions/min_length": 48.0, "epoch": 4.883097542814594, "grad_norm": 0.004097491752839433, "kl": 0.32275390625, "learning_rate": 5.264749946377382e-07, "loss": 0.000322294479701668, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3277, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 76.87500381469727, "completions/min_length": 42.25, "epoch": 4.884586746090841, "grad_norm": 1.3094593683122515, "kl": 0.3291015625, "learning_rate": 5.262388289691303e-07, "loss": -0.003950048703700304, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3278, "train_speed(iter/s)": 0.027615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 318.5, "completions/mean_length": 94.22916793823242, "completions/min_length": 42.75, "epoch": 4.886075949367089, "grad_norm": 0.004645987920726107, "kl": 0.31201171875, "learning_rate": 5.260026574304097e-07, "loss": 0.00031218386720865965, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3279, "train_speed(iter/s)": 0.027615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.25, "completions/mean_length": 84.10416984558105, "completions/min_length": 43.75, "epoch": 4.887565152643336, "grad_norm": 0.004868058129675058, "kl": 0.3193359375, "learning_rate": 5.257664800744122e-07, "loss": 0.00031926989322528243, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3280, "train_speed(iter/s)": 0.027618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 88.13541793823242, "completions/min_length": 47.5, "epoch": 4.889054355919583, "grad_norm": 0.005168364865015485, "kl": 0.30810546875, "learning_rate": 5.255302969539752e-07, "loss": 0.00030776378116570413, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3281, "train_speed(iter/s)": 0.027619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 89.89583396911621, "completions/min_length": 48.0, "epoch": 4.89054355919583, "grad_norm": 2.1320874703325305, "kl": 0.3037109375, "learning_rate": 5.252941081219373e-07, "loss": 0.010771333239972591, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3282, "train_speed(iter/s)": 0.027621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 81.28125190734863, "completions/min_length": 33.25, "epoch": 4.892032762472077, "grad_norm": 0.00660808120803982, "kl": 0.3251953125, "learning_rate": 5.250579136311383e-07, "loss": 0.0003254552138969302, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3283, "train_speed(iter/s)": 0.02762 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 84.89583587646484, "completions/min_length": 45.0, "epoch": 4.893521965748325, "grad_norm": 2.2291777838563487, "kl": 0.56396484375, "learning_rate": 5.248217135344191e-07, "loss": 0.020499899983406067, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.08908708393573761, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.46363356709480286, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3284, "train_speed(iter/s)": 0.027621 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 87.40625190734863, "completions/min_length": 40.75, "epoch": 4.895011169024572, "grad_norm": 0.8707950744458707, "kl": 0.31640625, "learning_rate": 5.245855078846221e-07, "loss": 0.015520969405770302, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3285, "train_speed(iter/s)": 0.027623 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 84.42708396911621, "completions/min_length": 28.5, "epoch": 4.896500372300819, "grad_norm": 0.004820226834160037, "kl": 0.32421875, "learning_rate": 5.243492967345908e-07, "loss": 0.0003238775534555316, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3286, "train_speed(iter/s)": 0.027619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.5, "completions/mean_length": 86.25000190734863, "completions/min_length": 42.25, "epoch": 4.897989575577066, "grad_norm": 0.005177850767516307, "kl": 0.318359375, "learning_rate": 5.241130801371704e-07, "loss": 0.0003181752981618047, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3287, "train_speed(iter/s)": 0.02762 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 93.30208587646484, "completions/min_length": 45.0, "epoch": 4.899478778853314, "grad_norm": 0.05292486650966646, "kl": 0.32275390625, "learning_rate": 5.238768581452066e-07, "loss": 0.00032327062217518687, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3288, "train_speed(iter/s)": 0.027618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 93.97916793823242, "completions/min_length": 44.0, "epoch": 4.90096798212956, "grad_norm": 0.004692531471199964, "kl": 0.28662109375, "learning_rate": 5.236406308115469e-07, "loss": 0.0002866776776500046, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3289, "train_speed(iter/s)": 0.027618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.25, "completions/mean_length": 80.61458587646484, "completions/min_length": 40.5, "epoch": 4.902457185405808, "grad_norm": 0.004153994326095003, "kl": 0.3291015625, "learning_rate": 5.234043981890393e-07, "loss": 0.0003289908345323056, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3290, "train_speed(iter/s)": 0.027619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 78.01041793823242, "completions/min_length": 36.75, "epoch": 4.903946388682055, "grad_norm": 0.004396956157958811, "kl": 0.3310546875, "learning_rate": 5.231681603305338e-07, "loss": 0.0003312372136861086, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3291, "train_speed(iter/s)": 0.027618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 83.66666793823242, "completions/min_length": 38.75, "epoch": 4.905435591958303, "grad_norm": 0.005408118520610877, "kl": 0.333984375, "learning_rate": 5.22931917288881e-07, "loss": 0.00033380946842953563, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3292, "train_speed(iter/s)": 0.027619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 90.60416984558105, "completions/min_length": 34.75, "epoch": 4.906924795234549, "grad_norm": 2.3249257977173436, "kl": 0.31689453125, "learning_rate": 5.226956691169331e-07, "loss": -0.005429903045296669, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3293, "train_speed(iter/s)": 0.027615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 82.23958587646484, "completions/min_length": 43.5, "epoch": 4.908413998510797, "grad_norm": 1.089420638135819, "kl": 0.37158203125, "learning_rate": 5.224594158675429e-07, "loss": 0.003160892054438591, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3294, "train_speed(iter/s)": 0.027614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.5, "completions/mean_length": 84.08333587646484, "completions/min_length": 34.75, "epoch": 4.909903201787044, "grad_norm": 0.004539878581830119, "kl": 0.32421875, "learning_rate": 5.222231575935648e-07, "loss": 0.0003241900121793151, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3295, "train_speed(iter/s)": 0.027613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 88.32291793823242, "completions/min_length": 46.25, "epoch": 4.911392405063291, "grad_norm": 0.005003364402780943, "kl": 0.30615234375, "learning_rate": 5.219868943478541e-07, "loss": 0.00030593914561904967, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3296, "train_speed(iter/s)": 0.027616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 83.52083587646484, "completions/min_length": 43.5, "epoch": 4.912881608339538, "grad_norm": 0.005786442826440041, "kl": 0.31103515625, "learning_rate": 5.217506261832674e-07, "loss": 0.00031087559182196856, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3297, "train_speed(iter/s)": 0.027616 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.25, "completions/mean_length": 88.11458587646484, "completions/min_length": 47.5, "epoch": 4.914370811615786, "grad_norm": 1.609145402233232, "kl": 0.3427734375, "learning_rate": 5.215143531526619e-07, "loss": -0.008033165708184242, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.2819983549416065, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3298, "train_speed(iter/s)": 0.027619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 81.21875190734863, "completions/min_length": 36.5, "epoch": 4.915860014892033, "grad_norm": 0.004142770820385492, "kl": 0.31201171875, "learning_rate": 5.212780753088968e-07, "loss": 0.000311446376144886, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3299, "train_speed(iter/s)": 0.027618 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 88.01041984558105, "completions/min_length": 38.5, "epoch": 4.91734921816828, "grad_norm": 0.004604532587126362, "kl": 0.32470703125, "learning_rate": 5.210417927048313e-07, "loss": 0.00032498984364792705, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3300, "train_speed(iter/s)": 0.027619 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 86.34375381469727, "completions/min_length": 47.5, "epoch": 4.918838421444527, "grad_norm": 0.0044144405351033734, "kl": 0.326171875, "learning_rate": 5.208055053933266e-07, "loss": 0.0003257696225773543, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3301, "train_speed(iter/s)": 0.027614 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 90.56250190734863, "completions/min_length": 43.75, "epoch": 4.920327624720774, "grad_norm": 2.6537184720916924, "kl": 0.2880859375, "learning_rate": 5.205692134272444e-07, "loss": 0.008632924407720566, "memory(GiB)": 112.53, "reward": 1.5208334028720856, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.5208333469927311, "rewards/CineAccuracyORM/std": 0.4065275490283966, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3302, "train_speed(iter/s)": 0.027615 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 82.82291984558105, "completions/min_length": 38.25, "epoch": 4.921816827997022, "grad_norm": 0.005380086555667878, "kl": 0.3359375, "learning_rate": 5.203329168594478e-07, "loss": 0.0003365136217325926, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3303, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 80.43750190734863, "completions/min_length": 39.75, "epoch": 4.923306031273269, "grad_norm": 0.004684070697962851, "kl": 0.31396484375, "learning_rate": 5.200966157428003e-07, "loss": 0.0003137890889775008, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3304, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.0, "completions/mean_length": 96.92708587646484, "completions/min_length": 42.25, "epoch": 4.924795234549516, "grad_norm": 0.9589473195765851, "kl": 0.2978515625, "learning_rate": 5.19860310130167e-07, "loss": 0.0035353051498532295, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3305, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.0, "completions/mean_length": 85.20833587646484, "completions/min_length": 33.25, "epoch": 4.926284437825763, "grad_norm": 0.0055970863137989905, "kl": 0.326171875, "learning_rate": 5.196240000744141e-07, "loss": 0.0003261449746787548, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3306, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 145.5, "completions/mean_length": 82.91666984558105, "completions/min_length": 42.5, "epoch": 4.927773641102011, "grad_norm": 0.004883016677249325, "kl": 0.3251953125, "learning_rate": 5.193876856284084e-07, "loss": 0.00032529226155020297, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3307, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.75, "completions/mean_length": 85.30208587646484, "completions/min_length": 45.75, "epoch": 4.929262844378258, "grad_norm": 0.9867123746050107, "kl": 0.3076171875, "learning_rate": 5.191513668450177e-07, "loss": 0.006017697509378195, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3308, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 88.10416984558105, "completions/min_length": 44.75, "epoch": 4.930752047654504, "grad_norm": 0.00482856143289128, "kl": 0.3115234375, "learning_rate": 5.189150437771111e-07, "loss": 0.0003115274594165385, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3309, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 85.51041984558105, "completions/min_length": 39.0, "epoch": 4.932241250930752, "grad_norm": 2.8494233064125454, "kl": 0.3056640625, "learning_rate": 5.186787164775581e-07, "loss": -0.0002905172877945006, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3310, "train_speed(iter/s)": 0.027605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.25, "completions/mean_length": 83.83333587646484, "completions/min_length": 36.5, "epoch": 4.933730454207, "grad_norm": 1.4731237938764348, "kl": 0.3125, "learning_rate": 5.184423849992298e-07, "loss": -0.00435969652608037, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3311, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 101.35416984558105, "completions/min_length": 48.0, "epoch": 4.9352196574832465, "grad_norm": 0.007067324812990022, "kl": 0.2783203125, "learning_rate": 5.182060493949981e-07, "loss": 0.00027790016611106694, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3312, "train_speed(iter/s)": 0.027605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.0, "completions/mean_length": 81.22916984558105, "completions/min_length": 40.75, "epoch": 4.936708860759493, "grad_norm": 0.5757365397522655, "kl": 0.30712890625, "learning_rate": 5.179697097177354e-07, "loss": -0.008477217517793179, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3313, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/mean_length": 89.77083396911621, "completions/min_length": 45.25, "epoch": 4.938198064035741, "grad_norm": 0.004016058170531183, "kl": 0.3076171875, "learning_rate": 5.177333660203153e-07, "loss": 0.0003081251052208245, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3314, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 82.50000381469727, "completions/min_length": 38.0, "epoch": 4.939687267311988, "grad_norm": 1.8893702981181348, "kl": 0.29736328125, "learning_rate": 5.17497018355612e-07, "loss": -0.001943016890436411, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3315, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 90.00000381469727, "completions/min_length": 40.0, "epoch": 4.9411764705882355, "grad_norm": 0.004505733736807642, "kl": 0.3232421875, "learning_rate": 5.172606667765014e-07, "loss": 0.00032300001475960016, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3316, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 83.20833778381348, "completions/min_length": 42.75, "epoch": 4.942665673864482, "grad_norm": 0.004028318898337701, "kl": 0.31103515625, "learning_rate": 5.170243113358593e-07, "loss": 0.00031141613726504147, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3317, "train_speed(iter/s)": 0.027597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.5, "completions/mean_length": 89.80208396911621, "completions/min_length": 43.75, "epoch": 4.94415487714073, "grad_norm": 1.7875821778095005, "kl": 2.8095703125, "learning_rate": 5.167879520865632e-07, "loss": 0.0037304041907191277, "memory(GiB)": 112.53, "reward": 1.5520834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3318, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 87.00000190734863, "completions/min_length": 38.25, "epoch": 4.945644080416977, "grad_norm": 0.6696865046429977, "kl": 0.3193359375, "learning_rate": 5.165515890814907e-07, "loss": 0.0006861285073682666, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3319, "train_speed(iter/s)": 0.027597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 94.35416984558105, "completions/min_length": 32.5, "epoch": 4.9471332836932245, "grad_norm": 0.004513654694103675, "kl": 0.31103515625, "learning_rate": 5.163152223735206e-07, "loss": 0.0003114071150775999, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3320, "train_speed(iter/s)": 0.027595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 87.07291984558105, "completions/min_length": 41.5, "epoch": 4.948622486969471, "grad_norm": 1.065456427975613, "kl": 0.3291015625, "learning_rate": 5.160788520155326e-07, "loss": 0.0050500500947237015, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3321, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 86.31250190734863, "completions/min_length": 32.0, "epoch": 4.950111690245718, "grad_norm": 1.5205418051459247, "kl": 0.29541015625, "learning_rate": 5.158424780604074e-07, "loss": 0.00200101500377059, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3582116588950157, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3322, "train_speed(iter/s)": 0.02759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 92.12500190734863, "completions/min_length": 45.25, "epoch": 4.951600893521966, "grad_norm": 0.004502172046510013, "kl": 0.31640625, "learning_rate": 5.156061005610257e-07, "loss": 0.00031637356732971966, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3323, "train_speed(iter/s)": 0.027589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 85.76041793823242, "completions/min_length": 39.25, "epoch": 4.953090096798213, "grad_norm": 0.004318304769636577, "kl": 0.3125, "learning_rate": 5.153697195702698e-07, "loss": 0.00031265756115317345, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3324, "train_speed(iter/s)": 0.027589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.0, "completions/mean_length": 91.62500190734863, "completions/min_length": 46.25, "epoch": 4.95457930007446, "grad_norm": 0.004516734148343355, "kl": 0.31494140625, "learning_rate": 5.151333351410229e-07, "loss": 0.00031467582448385656, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3325, "train_speed(iter/s)": 0.027586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 87.72916984558105, "completions/min_length": 41.0, "epoch": 4.956068503350707, "grad_norm": 1.4576397205229177, "kl": 0.3076171875, "learning_rate": 5.148969473261679e-07, "loss": 0.025444738566875458, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.9166666865348816, "rewards/CineAccuracyORM/std": 0.1671074442565441, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3326, "train_speed(iter/s)": 0.027586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.5, "completions/mean_length": 88.98958587646484, "completions/min_length": 41.75, "epoch": 4.957557706626955, "grad_norm": 0.005006626750131669, "kl": 0.326171875, "learning_rate": 5.146605561785896e-07, "loss": 0.00032634972012601793, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3327, "train_speed(iter/s)": 0.027585 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 83.88541984558105, "completions/min_length": 36.5, "epoch": 4.9590469099032015, "grad_norm": 0.004702962956354142, "kl": 0.30126953125, "learning_rate": 5.144241617511729e-07, "loss": 0.0003014677786268294, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3328, "train_speed(iter/s)": 0.027587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 146.0, "completions/mean_length": 80.52083778381348, "completions/min_length": 34.25, "epoch": 4.960536113179449, "grad_norm": 1.1063466101950392, "kl": 0.32763671875, "learning_rate": 5.141877640968037e-07, "loss": 0.01831304468214512, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3329, "train_speed(iter/s)": 0.027588 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 82.79166984558105, "completions/min_length": 35.5, "epoch": 4.962025316455696, "grad_norm": 1.184139834732652, "kl": 0.322265625, "learning_rate": 5.139513632683683e-07, "loss": -0.014873288571834564, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3330, "train_speed(iter/s)": 0.027584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 85.47916984558105, "completions/min_length": 32.5, "epoch": 4.963514519731944, "grad_norm": 3.055495162156062, "kl": 0.3115234375, "learning_rate": 5.137149593187543e-07, "loss": -0.025907738134264946, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.08908708393573761, "rewards/CineAccuracyORM/mean": 0.5000000186264515, "rewards/CineAccuracyORM/std": 0.4311184585094452, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3331, "train_speed(iter/s)": 0.027584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 89.42708587646484, "completions/min_length": 37.5, "epoch": 4.9650037230081905, "grad_norm": 2.0063827369817724, "kl": 0.3046875, "learning_rate": 5.134785523008495e-07, "loss": 0.006035043857991695, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3332, "train_speed(iter/s)": 0.027585 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 86.12500190734863, "completions/min_length": 41.75, "epoch": 4.966492926284438, "grad_norm": 0.00443058631902824, "kl": 0.30419921875, "learning_rate": 5.132421422675426e-07, "loss": 0.00030418368987739086, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3333, "train_speed(iter/s)": 0.027586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 82.59375381469727, "completions/min_length": 42.0, "epoch": 4.967982129560685, "grad_norm": 0.608646474697799, "kl": 0.5576171875, "learning_rate": 5.130057292717227e-07, "loss": -0.007678511552512646, "memory(GiB)": 112.53, "reward": 1.4687500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3334, "train_speed(iter/s)": 0.027589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 82.83333778381348, "completions/min_length": 36.0, "epoch": 4.969471332836932, "grad_norm": 0.0044610606023725435, "kl": 0.33154296875, "learning_rate": 5.127693133662801e-07, "loss": 0.00033200514735653996, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3335, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/mean_length": 88.23958587646484, "completions/min_length": 36.0, "epoch": 4.9709605361131795, "grad_norm": 0.004560345618852232, "kl": 0.33935546875, "learning_rate": 5.12532894604105e-07, "loss": 0.0003390520578250289, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3336, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 89.31250381469727, "completions/min_length": 38.0, "epoch": 4.972449739389427, "grad_norm": 1.0270391694606689, "kl": 0.314697265625, "learning_rate": 5.12296473038089e-07, "loss": -0.011869072914123535, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3337, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 76.66666984558105, "completions/min_length": 33.25, "epoch": 4.973938942665674, "grad_norm": 0.003919564214643674, "kl": 0.34765625, "learning_rate": 5.12060048721124e-07, "loss": 0.00034832124947570264, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3338, "train_speed(iter/s)": 0.02759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.75, "completions/mean_length": 83.64583587646484, "completions/min_length": 42.0, "epoch": 4.975428145941921, "grad_norm": 0.005197884936025335, "kl": 0.291015625, "learning_rate": 5.118236217061022e-07, "loss": 0.0002906002919189632, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3339, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 93.01042175292969, "completions/min_length": 39.75, "epoch": 4.976917349218168, "grad_norm": 0.7512291036827095, "kl": 0.31298828125, "learning_rate": 5.115871920459169e-07, "loss": -9.59140743361786e-05, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3340, "train_speed(iter/s)": 0.027589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 85.56250190734863, "completions/min_length": 30.5, "epoch": 4.978406552494415, "grad_norm": 1.6101812895210217, "kl": 0.32275390625, "learning_rate": 5.11350759793462e-07, "loss": 0.022294437512755394, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3341, "train_speed(iter/s)": 0.02759 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 78.15625190734863, "completions/min_length": 35.75, "epoch": 4.979895755770663, "grad_norm": 0.004705659851491046, "kl": 0.34033203125, "learning_rate": 5.111143250016314e-07, "loss": 0.0003407057956792414, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3342, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 95.88541793823242, "completions/min_length": 43.0, "epoch": 4.98138495904691, "grad_norm": 0.005172363068867913, "kl": 0.30712890625, "learning_rate": 5.108778877233202e-07, "loss": 0.0003066689823754132, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3343, "train_speed(iter/s)": 0.027589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 101.94791793823242, "completions/min_length": 35.25, "epoch": 4.982874162323157, "grad_norm": 0.7902965207392653, "kl": 0.28662109375, "learning_rate": 5.106414480114238e-07, "loss": -0.019440297037363052, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3344, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 93.76041793823242, "completions/min_length": 47.25, "epoch": 4.984363365599404, "grad_norm": 0.004970865102054804, "kl": 0.31201171875, "learning_rate": 5.104050059188379e-07, "loss": 0.00031204911647364497, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3345, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 78.92708587646484, "completions/min_length": 30.5, "epoch": 4.985852568875652, "grad_norm": 2.002015716087034, "kl": 0.31884765625, "learning_rate": 5.101685614984594e-07, "loss": 0.014518311247229576, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.0876726359128952, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3346, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 147.75, "completions/mean_length": 78.29166984558105, "completions/min_length": 30.25, "epoch": 4.987341772151899, "grad_norm": 0.005375462724977163, "kl": 0.3330078125, "learning_rate": 5.09932114803185e-07, "loss": 0.0003329586470499635, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3347, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 86.67708778381348, "completions/min_length": 34.5, "epoch": 4.9888309754281455, "grad_norm": 1.3660740247274454, "kl": 0.32373046875, "learning_rate": 5.096956658859122e-07, "loss": 0.028437282890081406, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3348, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 91.30208587646484, "completions/min_length": 32.75, "epoch": 4.990320178704393, "grad_norm": 0.005283259745131337, "kl": 0.31298828125, "learning_rate": 5.09459214799539e-07, "loss": 0.00031332604703493416, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3349, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 81.41666984558105, "completions/min_length": 37.25, "epoch": 4.991809381980641, "grad_norm": 0.004046255593629111, "kl": 0.3408203125, "learning_rate": 5.092227615969642e-07, "loss": 0.000340757193043828, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3350, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 89.76041793823242, "completions/min_length": 32.25, "epoch": 4.993298585256888, "grad_norm": 0.004724601142982825, "kl": 0.302734375, "learning_rate": 5.089863063310865e-07, "loss": 0.0003025761980097741, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3351, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 91.19791793823242, "completions/min_length": 37.0, "epoch": 4.994787788533134, "grad_norm": 0.004510296219575742, "kl": 0.30029296875, "learning_rate": 5.087498490548054e-07, "loss": 0.0002995026297867298, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3352, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 83.85416984558105, "completions/min_length": 27.0, "epoch": 4.996276991809382, "grad_norm": 2.667157151146671, "kl": 0.3310546875, "learning_rate": 5.085133898210207e-07, "loss": -0.04074828326702118, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.33468010276556015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3353, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 79.38541793823242, "completions/min_length": 35.75, "epoch": 4.997766195085629, "grad_norm": 2.003127418443658, "kl": 0.30810546875, "learning_rate": 5.082769286826329e-07, "loss": 0.01324792392551899, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3354, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 90.90625190734863, "completions/min_length": 38.0, "epoch": 4.999255398361877, "grad_norm": 0.004368922382910479, "kl": 0.3134765625, "learning_rate": 5.080404656925425e-07, "loss": 0.00031413882970809937, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3355, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 86.62500381469727, "completions/min_length": 41.0, "epoch": 5.001489203276247, "grad_norm": 1.7958533472280065, "kl": 0.322265625, "learning_rate": 5.078040009036508e-07, "loss": -0.002886231755837798, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666939854622, "rewards/CineAccuracyORM/std": 0.40787915512919426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3356, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 87.35416793823242, "completions/min_length": 34.5, "epoch": 5.0029784065524945, "grad_norm": 1.4194224379912235, "kl": 0.30029296875, "learning_rate": 5.075675343688593e-07, "loss": -0.013833852484822273, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.13548902794718742, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3357, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 76.59375, "completions/min_length": 29.5, "epoch": 5.004467609828741, "grad_norm": 0.004388479237886138, "kl": 0.33984375, "learning_rate": 5.0733106614107e-07, "loss": 0.0003395565727259964, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3358, "train_speed(iter/s)": 0.027595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 84.03125381469727, "completions/min_length": 28.5, "epoch": 5.005956813104989, "grad_norm": 1.3483955512203885, "kl": 0.30615234375, "learning_rate": 5.070945962731852e-07, "loss": 0.0035791851114481688, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3359, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 83.45833587646484, "completions/min_length": 34.25, "epoch": 5.007446016381236, "grad_norm": 0.003956561175673431, "kl": 0.310546875, "learning_rate": 5.068581248181078e-07, "loss": 0.00031047951779328287, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3360, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 87.61458587646484, "completions/min_length": 27.0, "epoch": 5.0089352196574835, "grad_norm": 0.005715691890615731, "kl": 0.3115234375, "learning_rate": 5.066216518287406e-07, "loss": 0.00031152123119682074, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3361, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 84.38541793823242, "completions/min_length": 33.75, "epoch": 5.01042442293373, "grad_norm": 0.004514939612821738, "kl": 0.31103515625, "learning_rate": 5.063851773579869e-07, "loss": 0.00031072975252754986, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3362, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 93.92708778381348, "completions/min_length": 35.25, "epoch": 5.011913626209978, "grad_norm": 0.005020466065001936, "kl": 0.29443359375, "learning_rate": 5.061487014587507e-07, "loss": 0.0002942957216873765, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3363, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 91.03125381469727, "completions/min_length": 37.5, "epoch": 5.013402829486225, "grad_norm": 0.004285505009782408, "kl": 0.31591796875, "learning_rate": 5.059122241839358e-07, "loss": 0.0003162677166983485, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3364, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 87.69791984558105, "completions/min_length": 29.5, "epoch": 5.014892032762472, "grad_norm": 0.004931178063437362, "kl": 0.3212890625, "learning_rate": 5.056757455864468e-07, "loss": 0.00032174820080399513, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3365, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.25, "completions/mean_length": 83.45833587646484, "completions/min_length": 32.25, "epoch": 5.016381236038719, "grad_norm": 0.005160304437357826, "kl": 0.310546875, "learning_rate": 5.054392657191883e-07, "loss": 0.00031087425304576755, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3366, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 89.44791984558105, "completions/min_length": 32.25, "epoch": 5.017870439314967, "grad_norm": 0.004726570158083856, "kl": 0.32421875, "learning_rate": 5.052027846350651e-07, "loss": 0.00032491408637724817, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3367, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.25, "completions/mean_length": 89.72916984558105, "completions/min_length": 36.5, "epoch": 5.019359642591214, "grad_norm": 0.004623684062528219, "kl": 0.31689453125, "learning_rate": 5.049663023869823e-07, "loss": 0.00031692502670921385, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3368, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 80.98958396911621, "completions/min_length": 39.5, "epoch": 5.0208488458674605, "grad_norm": 0.0037523715197606577, "kl": 0.326171875, "learning_rate": 5.047298190278458e-07, "loss": 0.0003265073464717716, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3369, "train_speed(iter/s)": 0.027595 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 87.77083587646484, "completions/min_length": 36.25, "epoch": 5.022338049143708, "grad_norm": 0.6276726674292885, "kl": 0.55615234375, "learning_rate": 5.044933346105609e-07, "loss": -0.004033383913338184, "memory(GiB)": 112.53, "reward": 1.7083333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3370, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.75, "completions/mean_length": 82.76041984558105, "completions/min_length": 34.5, "epoch": 5.023827252419955, "grad_norm": 0.005764114893482811, "kl": 0.31982421875, "learning_rate": 5.042568491880338e-07, "loss": 0.000320086139254272, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3371, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 85.51041984558105, "completions/min_length": 42.25, "epoch": 5.025316455696203, "grad_norm": 0.0036652274368860656, "kl": 0.31103515625, "learning_rate": 5.040203628131705e-07, "loss": 0.0003116966981906444, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3372, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 85.55208587646484, "completions/min_length": 34.5, "epoch": 5.0268056589724495, "grad_norm": 0.9715147324611018, "kl": 0.2939453125, "learning_rate": 5.037838755388774e-07, "loss": 0.005466242786496878, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3373, "train_speed(iter/s)": 0.027596 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 80.88541793823242, "completions/min_length": 35.5, "epoch": 5.028294862248697, "grad_norm": 0.00480576946885726, "kl": 0.31494140625, "learning_rate": 5.035473874180611e-07, "loss": 0.00031445635249838233, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3374, "train_speed(iter/s)": 0.027599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 92.79166984558105, "completions/min_length": 41.5, "epoch": 5.029784065524944, "grad_norm": 0.004464595561456831, "kl": 0.31396484375, "learning_rate": 5.033108985036286e-07, "loss": 0.00031424337066709995, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3375, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 88.08333587646484, "completions/min_length": 35.0, "epoch": 5.031273268801192, "grad_norm": 0.003988481990468294, "kl": 0.326171875, "learning_rate": 5.030744088484866e-07, "loss": 0.0003256734344176948, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3376, "train_speed(iter/s)": 0.027597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 85.11458587646484, "completions/min_length": 36.5, "epoch": 5.032762472077438, "grad_norm": 0.004488156163793452, "kl": 0.307373046875, "learning_rate": 5.028379185055424e-07, "loss": 0.00030718217021785676, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3377, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 93.18750381469727, "completions/min_length": 35.75, "epoch": 5.034251675353686, "grad_norm": 0.004856057454287635, "kl": 0.29443359375, "learning_rate": 5.026014275277031e-07, "loss": 0.00029442631057463586, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3378, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.75, "completions/mean_length": 96.94791793823242, "completions/min_length": 41.75, "epoch": 5.035740878629933, "grad_norm": 0.02716990852410447, "kl": 0.321044921875, "learning_rate": 5.023649359678763e-07, "loss": 0.0003206482797395438, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3379, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 86.26041793823242, "completions/min_length": 42.5, "epoch": 5.037230081906181, "grad_norm": 0.0794926850635618, "kl": 0.36083984375, "learning_rate": 5.021284438789693e-07, "loss": 0.0003607876133173704, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3380, "train_speed(iter/s)": 0.027597 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 85.01041984558105, "completions/min_length": 40.25, "epoch": 5.038719285182427, "grad_norm": 1.0836090299514651, "kl": 0.3330078125, "learning_rate": 5.0189195131389e-07, "loss": -0.017217161133885384, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3381, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 88.67708587646484, "completions/min_length": 46.75, "epoch": 5.040208488458674, "grad_norm": 0.004441737812318204, "kl": 0.30126953125, "learning_rate": 5.016554583255461e-07, "loss": 0.000300610059639439, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3382, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 87.76041984558105, "completions/min_length": 42.0, "epoch": 5.041697691734922, "grad_norm": 1.0042331501724409, "kl": 0.3271484375, "learning_rate": 5.014189649668456e-07, "loss": -0.019523754715919495, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3383, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 90.72916984558105, "completions/min_length": 41.25, "epoch": 5.043186895011169, "grad_norm": 0.005006551719523412, "kl": 0.32666015625, "learning_rate": 5.011824712906962e-07, "loss": 0.0003257699718233198, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3384, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 86.25000190734863, "completions/min_length": 40.5, "epoch": 5.044676098287416, "grad_norm": 0.0050816162865835015, "kl": 0.33349609375, "learning_rate": 5.009459773500062e-07, "loss": 0.00033331726444885135, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3385, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 93.37500381469727, "completions/min_length": 44.25, "epoch": 5.046165301563663, "grad_norm": 0.9869217480389478, "kl": 0.2890625, "learning_rate": 5.007094831976831e-07, "loss": -0.006184294354170561, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3386, "train_speed(iter/s)": 0.027599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 87.28125190734863, "completions/min_length": 43.25, "epoch": 5.047654504839911, "grad_norm": 0.9536999663387274, "kl": 0.31396484375, "learning_rate": 5.004729888866357e-07, "loss": 0.0019393835682421923, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3387, "train_speed(iter/s)": 0.027599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 92.44791793823242, "completions/min_length": 50.25, "epoch": 5.049143708116158, "grad_norm": 0.003890730416331402, "kl": 0.27880859375, "learning_rate": 5.002364944697719e-07, "loss": 0.0002786013064906001, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3388, "train_speed(iter/s)": 0.027599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 92.39583587646484, "completions/min_length": 38.75, "epoch": 5.050632911392405, "grad_norm": 0.7156999937408282, "kl": 0.3076171875, "learning_rate": 5e-07, "loss": 0.0020301351323723793, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3389, "train_speed(iter/s)": 0.027601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 90.51041984558105, "completions/min_length": 37.5, "epoch": 5.052122114668652, "grad_norm": 0.004365424484373373, "kl": 0.28564453125, "learning_rate": 4.997635055302279e-07, "loss": 0.00028556742472574115, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3390, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 79.26041984558105, "completions/min_length": 32.5, "epoch": 5.0536113179449, "grad_norm": 0.00409157900369002, "kl": 0.3193359375, "learning_rate": 4.995270111133643e-07, "loss": 0.00031960755586624146, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3391, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 89.22916793823242, "completions/min_length": 34.75, "epoch": 5.055100521221147, "grad_norm": 0.9920232435297605, "kl": 0.292724609375, "learning_rate": 4.992905168023168e-07, "loss": -0.008752134628593922, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3392, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 93.02083587646484, "completions/min_length": 43.0, "epoch": 5.056589724497394, "grad_norm": 2.294507252853804, "kl": 0.32568359375, "learning_rate": 4.990540226499941e-07, "loss": -0.011337298899888992, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3261406943202019, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3393, "train_speed(iter/s)": 0.027605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 94.48958587646484, "completions/min_length": 42.25, "epoch": 5.058078927773641, "grad_norm": 0.004286025561270475, "kl": 0.286376953125, "learning_rate": 4.988175287093039e-07, "loss": 0.00028683891287073493, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3394, "train_speed(iter/s)": 0.027601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 91.43750190734863, "completions/min_length": 36.25, "epoch": 5.059568131049888, "grad_norm": 0.003916014288515946, "kl": 0.326171875, "learning_rate": 4.985810350331544e-07, "loss": 0.00032609596382826567, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3395, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.0, "completions/mean_length": 89.03125190734863, "completions/min_length": 46.5, "epoch": 5.061057334326136, "grad_norm": 0.0039376260130408874, "kl": 0.2880859375, "learning_rate": 4.983445416744539e-07, "loss": 0.00028812309028580785, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3396, "train_speed(iter/s)": 0.027603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 91.36458396911621, "completions/min_length": 45.0, "epoch": 5.062546537602382, "grad_norm": 1.4620944824848747, "kl": 0.30029296875, "learning_rate": 4.981080486861099e-07, "loss": -8.463188714813441e-05, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3397, "train_speed(iter/s)": 0.027601 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.75, "completions/mean_length": 84.21875381469727, "completions/min_length": 45.5, "epoch": 5.06403574087863, "grad_norm": 1.185627018102963, "kl": 0.29296875, "learning_rate": 4.978715561210307e-07, "loss": 0.004966151900589466, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3398, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.5, "completions/mean_length": 86.14583587646484, "completions/min_length": 48.0, "epoch": 5.065524944154877, "grad_norm": 0.004900835701399577, "kl": 0.30322265625, "learning_rate": 4.976350640321237e-07, "loss": 0.000303306762361899, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3399, "train_speed(iter/s)": 0.027605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.0, "completions/mean_length": 93.52083396911621, "completions/min_length": 42.25, "epoch": 5.0670141474311245, "grad_norm": 0.005275770602979102, "kl": 0.278076171875, "learning_rate": 4.973985724722968e-07, "loss": 0.00027844574651680887, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3400, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.75, "completions/mean_length": 86.08333587646484, "completions/min_length": 38.75, "epoch": 5.068503350707371, "grad_norm": 0.004101936688570745, "kl": 0.31640625, "learning_rate": 4.971620814944577e-07, "loss": 0.0003171527059748769, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3401, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 94.50000190734863, "completions/min_length": 43.5, "epoch": 5.069992553983619, "grad_norm": 0.003740883683077619, "kl": 0.26806640625, "learning_rate": 4.969255911515134e-07, "loss": 0.00026831423747353256, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3402, "train_speed(iter/s)": 0.027605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 92.58333587646484, "completions/min_length": 42.25, "epoch": 5.071481757259866, "grad_norm": 0.004533576666436002, "kl": 0.29638671875, "learning_rate": 4.966891014963716e-07, "loss": 0.00029688363429158926, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3403, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 78.00000190734863, "completions/min_length": 29.5, "epoch": 5.0729709605361135, "grad_norm": 0.003710654316411624, "kl": 0.318359375, "learning_rate": 4.964526125819389e-07, "loss": 0.00031737287645228207, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3404, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 87.70833587646484, "completions/min_length": 39.25, "epoch": 5.07446016381236, "grad_norm": 0.004174637948868702, "kl": 0.31005859375, "learning_rate": 4.962161244611227e-07, "loss": 0.0003101057664025575, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3405, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 84.15625190734863, "completions/min_length": 35.25, "epoch": 5.075949367088608, "grad_norm": 0.00480507280855839, "kl": 0.3291015625, "learning_rate": 4.959796371868297e-07, "loss": 0.0003295568167231977, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3406, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 90.42708587646484, "completions/min_length": 39.25, "epoch": 5.077438570364855, "grad_norm": 2.2007935954383293, "kl": 0.298828125, "learning_rate": 4.957431508119663e-07, "loss": 0.0017527341842651367, "memory(GiB)": 112.53, "reward": 1.7083333432674408, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7083333358168602, "rewards/CineAccuracyORM/std": 0.22734662145376205, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3407, "train_speed(iter/s)": 0.027613 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 96.78125190734863, "completions/min_length": 36.75, "epoch": 5.078927773641102, "grad_norm": 1.9670197371232092, "kl": 0.29541015625, "learning_rate": 4.955066653894393e-07, "loss": 0.0014385685790330172, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3408, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 94.67708778381348, "completions/min_length": 39.75, "epoch": 5.080416976917349, "grad_norm": 0.0037889851927720057, "kl": 0.291259765625, "learning_rate": 4.952701809721543e-07, "loss": 0.0002909886825364083, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3409, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 93.51041984558105, "completions/min_length": 43.5, "epoch": 5.081906180193596, "grad_norm": 0.0041332718778071915, "kl": 0.2900390625, "learning_rate": 4.950336976130176e-07, "loss": 0.00029093859484419227, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3410, "train_speed(iter/s)": 0.02761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.25, "completions/mean_length": 86.48958587646484, "completions/min_length": 38.5, "epoch": 5.083395383469844, "grad_norm": 0.0038866076833012743, "kl": 0.302734375, "learning_rate": 4.94797215364935e-07, "loss": 0.0003031829546671361, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3411, "train_speed(iter/s)": 0.027611 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 97.36458587646484, "completions/min_length": 33.75, "epoch": 5.084884586746091, "grad_norm": 0.6374122623249557, "kl": 0.37841796875, "learning_rate": 4.945607342808117e-07, "loss": -0.0050055235624313354, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3412, "train_speed(iter/s)": 0.027609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.5, "completions/mean_length": 87.54166984558105, "completions/min_length": 43.75, "epoch": 5.086373790022338, "grad_norm": 1.4509447696570446, "kl": 0.30126953125, "learning_rate": 4.943242544135532e-07, "loss": -0.0020597553811967373, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3413, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 90.62500381469727, "completions/min_length": 38.25, "epoch": 5.087862993298585, "grad_norm": 2.4704991793125277, "kl": 1.99365234375, "learning_rate": 4.940877758160641e-07, "loss": 0.001986087067052722, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333395421505, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3414, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 92.79166984558105, "completions/min_length": 40.5, "epoch": 5.089352196574833, "grad_norm": 0.004156427826550248, "kl": 0.31787109375, "learning_rate": 4.938512985412493e-07, "loss": 0.0003180800413247198, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3415, "train_speed(iter/s)": 0.027609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 86.40625190734863, "completions/min_length": 39.5, "epoch": 5.0908413998510795, "grad_norm": 0.0035688846649388406, "kl": 0.3046875, "learning_rate": 4.936148226420132e-07, "loss": 0.0003043710603378713, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3416, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 93.26041793823242, "completions/min_length": 44.5, "epoch": 5.092330603127327, "grad_norm": 0.0037855889437892324, "kl": 0.28515625, "learning_rate": 4.933783481712595e-07, "loss": 0.00028547545662149787, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3417, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 95.21875381469727, "completions/min_length": 43.25, "epoch": 5.093819806403574, "grad_norm": 1.3805790494182446, "kl": 0.294921875, "learning_rate": 4.931418751818922e-07, "loss": -0.005012223497033119, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3418, "train_speed(iter/s)": 0.027612 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 95.06250381469727, "completions/min_length": 44.75, "epoch": 5.095309009679822, "grad_norm": 0.004865269620084293, "kl": 0.283203125, "learning_rate": 4.929054037268147e-07, "loss": 0.0002830885350704193, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3419, "train_speed(iter/s)": 0.02761 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 89.38541984558105, "completions/min_length": 44.25, "epoch": 5.0967982129560685, "grad_norm": 0.004256021400159707, "kl": 0.29052734375, "learning_rate": 4.926689338589299e-07, "loss": 0.00029043154790997505, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3420, "train_speed(iter/s)": 0.027609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 98.70833587646484, "completions/min_length": 33.75, "epoch": 5.098287416232315, "grad_norm": 0.0036417639707500386, "kl": 0.2724609375, "learning_rate": 4.924324656311407e-07, "loss": 0.00027245451929047704, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3421, "train_speed(iter/s)": 0.027609 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 106.56250381469727, "completions/min_length": 44.0, "epoch": 5.099776619508563, "grad_norm": 0.009373393381658275, "kl": 0.273193359375, "learning_rate": 4.921959990963493e-07, "loss": 0.00027308971039019525, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3422, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.25, "completions/mean_length": 104.4375, "completions/min_length": 44.25, "epoch": 5.10126582278481, "grad_norm": 1.2188250654455701, "kl": 0.28369140625, "learning_rate": 4.919595343074575e-07, "loss": 0.006249844562262297, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3423, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 94.13541984558105, "completions/min_length": 42.25, "epoch": 5.1027550260610575, "grad_norm": 0.005076417174395275, "kl": 0.302734375, "learning_rate": 4.917230713173671e-07, "loss": 0.0003022847813554108, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3424, "train_speed(iter/s)": 0.027604 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 94.79166793823242, "completions/min_length": 45.25, "epoch": 5.104244229337304, "grad_norm": 0.004064551610484512, "kl": 0.29150390625, "learning_rate": 4.914866101789792e-07, "loss": 0.00029102456755936146, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3425, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 97.02083587646484, "completions/min_length": 45.75, "epoch": 5.105733432613552, "grad_norm": 1.6102137769344225, "kl": 0.291015625, "learning_rate": 4.912501509451947e-07, "loss": -0.005991206504404545, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3426, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 101.29166984558105, "completions/min_length": 42.25, "epoch": 5.107222635889799, "grad_norm": 0.00616538824247387, "kl": 0.2783203125, "learning_rate": 4.910136936689134e-07, "loss": 0.0002782001974992454, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3427, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 98.95833587646484, "completions/min_length": 41.75, "epoch": 5.108711839166046, "grad_norm": 0.004478211087331567, "kl": 0.29150390625, "learning_rate": 4.907772384030357e-07, "loss": 0.0002912735508289188, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3428, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 91.64583396911621, "completions/min_length": 38.25, "epoch": 5.110201042442293, "grad_norm": 0.0045719008657402116, "kl": 0.31591796875, "learning_rate": 4.90540785200461e-07, "loss": 0.00031582434894517064, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3429, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 89.11458587646484, "completions/min_length": 43.25, "epoch": 5.111690245718541, "grad_norm": 1.088938509145336, "kl": 0.28564453125, "learning_rate": 4.903043341140879e-07, "loss": 0.013923026621341705, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3430, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 96.65625190734863, "completions/min_length": 47.0, "epoch": 5.113179448994788, "grad_norm": 0.004572053187839084, "kl": 0.302734375, "learning_rate": 4.900678851968151e-07, "loss": 0.0003022216260433197, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3431, "train_speed(iter/s)": 0.027605 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 99.85416793823242, "completions/min_length": 41.0, "epoch": 5.114668652271035, "grad_norm": 0.004133340566642937, "kl": 0.28759765625, "learning_rate": 4.898314385015407e-07, "loss": 0.0002875011705327779, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3432, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.5, "completions/mean_length": 102.90625381469727, "completions/min_length": 48.5, "epoch": 5.116157855547282, "grad_norm": 0.004099869363612648, "kl": 0.28271484375, "learning_rate": 4.895949940811619e-07, "loss": 0.00028235107311047614, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3433, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 99.44791984558105, "completions/min_length": 47.25, "epoch": 5.117647058823529, "grad_norm": 1.0397142169129572, "kl": 0.29736328125, "learning_rate": 4.893585519885763e-07, "loss": 0.008028032258152962, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3434, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 99.36458587646484, "completions/min_length": 44.5, "epoch": 5.119136262099777, "grad_norm": 0.45791253351083755, "kl": 0.29150390625, "learning_rate": 4.891221122766798e-07, "loss": 0.005371539853513241, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3435, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.75, "completions/mean_length": 110.73958587646484, "completions/min_length": 41.0, "epoch": 5.1206254653760235, "grad_norm": 1.5963734226435722, "kl": 0.27978515625, "learning_rate": 4.888856749983686e-07, "loss": 0.007822506129741669, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.10661446116864681, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.31256140768527985, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3436, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.0, "completions/mean_length": 94.46875381469727, "completions/min_length": 44.0, "epoch": 5.122114668652271, "grad_norm": 0.004279647454133209, "kl": 0.2978515625, "learning_rate": 4.88649240206538e-07, "loss": 0.0002981731086038053, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3437, "train_speed(iter/s)": 0.027607 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 87.10416984558105, "completions/min_length": 41.0, "epoch": 5.123603871928518, "grad_norm": 0.004067165696362045, "kl": 0.3134765625, "learning_rate": 4.884128079540829e-07, "loss": 0.0003133645514026284, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3438, "train_speed(iter/s)": 0.027608 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.0, "completions/mean_length": 92.20833778381348, "completions/min_length": 28.0, "epoch": 5.125093075204766, "grad_norm": 2.0521599133298354, "kl": 0.308349609375, "learning_rate": 4.881763782938979e-07, "loss": -0.01630193367600441, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3439, "train_speed(iter/s)": 0.027606 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.25, "completions/mean_length": 107.17708587646484, "completions/min_length": 50.25, "epoch": 5.1265822784810124, "grad_norm": 0.004180974514920891, "kl": 0.260986328125, "learning_rate": 4.879399512788761e-07, "loss": 0.0002610931114759296, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3440, "train_speed(iter/s)": 0.027603 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 90.30208587646484, "completions/min_length": 32.75, "epoch": 5.12807148175726, "grad_norm": 0.003721160706349883, "kl": 0.28759765625, "learning_rate": 4.877035269619111e-07, "loss": 0.0002877216611523181, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3441, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 89.75000190734863, "completions/min_length": 43.75, "epoch": 5.129560685033507, "grad_norm": 1.7646138711065884, "kl": 0.322265625, "learning_rate": 4.87467105395895e-07, "loss": -0.0037791524082422256, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3442, "train_speed(iter/s)": 0.027602 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 101.21875190734863, "completions/min_length": 33.5, "epoch": 5.131049888309755, "grad_norm": 0.005161590644916297, "kl": 0.28466796875, "learning_rate": 4.8723068663372e-07, "loss": 0.0002843968104571104, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3443, "train_speed(iter/s)": 0.027598 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 95.30208587646484, "completions/min_length": 40.5, "epoch": 5.132539091586001, "grad_norm": 1.1452429929678303, "kl": 0.29833984375, "learning_rate": 4.869942707282773e-07, "loss": 7.130667654564604e-05, "memory(GiB)": 112.53, "reward": 1.6979167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3444, "train_speed(iter/s)": 0.027599 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 98.48958587646484, "completions/min_length": 40.5, "epoch": 5.134028294862249, "grad_norm": 1.4452612186189582, "kl": 0.2861328125, "learning_rate": 4.867578577324574e-07, "loss": -0.02929503098130226, "memory(GiB)": 112.53, "reward": 1.885416716337204, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8854166865348816, "rewards/CineAccuracyORM/std": 0.24199935421347618, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3445, "train_speed(iter/s)": 0.0276 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 100.43750381469727, "completions/min_length": 47.75, "epoch": 5.135517498138496, "grad_norm": 0.00456946164781328, "kl": 0.28076171875, "learning_rate": 4.865214476991505e-07, "loss": 0.00028082786593586206, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3446, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.5, "completions/mean_length": 100.64583778381348, "completions/min_length": 44.75, "epoch": 5.137006701414743, "grad_norm": 0.004159422769896854, "kl": 0.29443359375, "learning_rate": 4.862850406812457e-07, "loss": 0.00029479365912266076, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3447, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 88.32291984558105, "completions/min_length": 41.75, "epoch": 5.13849590469099, "grad_norm": 0.005181159245395117, "kl": 0.30419921875, "learning_rate": 4.860486367316316e-07, "loss": 0.00030444259755313396, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3448, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.0, "completions/mean_length": 100.08333587646484, "completions/min_length": 31.25, "epoch": 5.139985107967237, "grad_norm": 1.8284697955975733, "kl": 0.28662109375, "learning_rate": 4.858122359031964e-07, "loss": 0.004199827555567026, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3449, "train_speed(iter/s)": 0.027591 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 97.21875381469727, "completions/min_length": 39.75, "epoch": 5.141474311243485, "grad_norm": 1.0393139526698663, "kl": 0.30419921875, "learning_rate": 4.85575838248827e-07, "loss": -0.0067437151446938515, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3450, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 95.57291793823242, "completions/min_length": 44.5, "epoch": 5.142963514519732, "grad_norm": 0.004595654983857709, "kl": 0.29248046875, "learning_rate": 4.853394438214105e-07, "loss": 0.00029286000062711537, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3451, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.25, "completions/mean_length": 92.97916984558105, "completions/min_length": 32.0, "epoch": 5.144452717795979, "grad_norm": 0.004939507151680022, "kl": 0.31787109375, "learning_rate": 4.85103052673832e-07, "loss": 0.00031797989504411817, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3452, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 93.12500190734863, "completions/min_length": 40.25, "epoch": 5.145941921072226, "grad_norm": 1.163789798819547, "kl": 0.3037109375, "learning_rate": 4.848666648589771e-07, "loss": -0.0042214952409267426, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3453, "train_speed(iter/s)": 0.027593 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 94.08333396911621, "completions/min_length": 46.25, "epoch": 5.147431124348474, "grad_norm": 1.5898398439934731, "kl": 0.29541015625, "learning_rate": 4.846302804297301e-07, "loss": -0.0028986481484025717, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3454, "train_speed(iter/s)": 0.027594 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 94.92708587646484, "completions/min_length": 42.5, "epoch": 5.148920327624721, "grad_norm": 0.004200886257422964, "kl": 0.29638671875, "learning_rate": 4.843938994389743e-07, "loss": 0.00029650406213477254, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3455, "train_speed(iter/s)": 0.027592 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 104.94791984558105, "completions/min_length": 46.25, "epoch": 5.150409530900968, "grad_norm": 0.004524618530758772, "kl": 0.294921875, "learning_rate": 4.841575219395927e-07, "loss": 0.0002946058812085539, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3456, "train_speed(iter/s)": 0.027589 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 94.90625190734863, "completions/min_length": 46.0, "epoch": 5.151898734177215, "grad_norm": 0.9912554657478176, "kl": 0.31689453125, "learning_rate": 4.839211479844673e-07, "loss": 0.007958788424730301, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3457, "train_speed(iter/s)": 0.027587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 98.16666984558105, "completions/min_length": 43.75, "epoch": 5.153387937453463, "grad_norm": 0.7169208120554137, "kl": 0.27490234375, "learning_rate": 4.836847776264794e-07, "loss": 0.006516417022794485, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3458, "train_speed(iter/s)": 0.027586 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 99.63541793823242, "completions/min_length": 47.0, "epoch": 5.15487714072971, "grad_norm": 0.005728175065345606, "kl": 0.287109375, "learning_rate": 4.834484109185095e-07, "loss": 0.00028685457073152065, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3459, "train_speed(iter/s)": 0.027584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 101.87500190734863, "completions/min_length": 45.75, "epoch": 5.156366344005956, "grad_norm": 1.7101820121124969, "kl": 0.28857421875, "learning_rate": 4.832120479134369e-07, "loss": 0.008217383176088333, "memory(GiB)": 112.53, "reward": 1.6458333432674408, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.6458333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3460, "train_speed(iter/s)": 0.027584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.5, "completions/mean_length": 96.76041984558105, "completions/min_length": 40.25, "epoch": 5.157855547282204, "grad_norm": 0.004110376775353032, "kl": 0.28955078125, "learning_rate": 4.829756886641407e-07, "loss": 0.00028945106896571815, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3461, "train_speed(iter/s)": 0.027587 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 93.11458587646484, "completions/min_length": 35.25, "epoch": 5.159344750558451, "grad_norm": 1.3204032243149226, "kl": 0.3046875, "learning_rate": 4.827393332234985e-07, "loss": 0.002382636768743396, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3462, "train_speed(iter/s)": 0.027585 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 103.94791984558105, "completions/min_length": 45.25, "epoch": 5.160833953834699, "grad_norm": 1.5805454935580938, "kl": 0.294921875, "learning_rate": 4.825029816443879e-07, "loss": 0.017493296414613724, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3463, "train_speed(iter/s)": 0.027584 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.5, "completions/mean_length": 89.56250190734863, "completions/min_length": 36.75, "epoch": 5.162323157110945, "grad_norm": 0.0041351649128856345, "kl": 0.31982421875, "learning_rate": 4.822666339796849e-07, "loss": 0.0003197619807906449, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3464, "train_speed(iter/s)": 0.027583 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 98.67708587646484, "completions/min_length": 44.5, "epoch": 5.163812360387193, "grad_norm": 1.119321925213919, "kl": 0.2919921875, "learning_rate": 4.820302902822647e-07, "loss": 0.008216704241931438, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3465, "train_speed(iter/s)": 0.027577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 90.43750381469727, "completions/min_length": 39.0, "epoch": 5.16530156366344, "grad_norm": 0.00421663957369237, "kl": 0.31103515625, "learning_rate": 4.81793950605002e-07, "loss": 0.00031104066874831915, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3466, "train_speed(iter/s)": 0.02758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 90.59375190734863, "completions/min_length": 41.75, "epoch": 5.1667907669396875, "grad_norm": 0.0038606847065775305, "kl": 0.28857421875, "learning_rate": 4.815576150007701e-07, "loss": 0.00028838327853009105, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3467, "train_speed(iter/s)": 0.02758 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 90.67708587646484, "completions/min_length": 45.25, "epoch": 5.168279970215934, "grad_norm": 0.0036449462322748006, "kl": 0.30078125, "learning_rate": 4.813212835224418e-07, "loss": 0.00030088803032413125, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3468, "train_speed(iter/s)": 0.027576 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 89.91666984558105, "completions/min_length": 39.5, "epoch": 5.169769173492182, "grad_norm": 0.0038980516191064202, "kl": 0.32373046875, "learning_rate": 4.81084956222889e-07, "loss": 0.00032413541339337826, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3469, "train_speed(iter/s)": 0.027574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 98.79166984558105, "completions/min_length": 38.5, "epoch": 5.171258376768429, "grad_norm": 1.0525528948488079, "kl": 0.2841796875, "learning_rate": 4.808486331549823e-07, "loss": 0.00933044869452715, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3470, "train_speed(iter/s)": 0.02757 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 95.60416984558105, "completions/min_length": 44.75, "epoch": 5.1727475800446765, "grad_norm": 0.0046054774071372755, "kl": 0.27978515625, "learning_rate": 4.806123143715915e-07, "loss": 0.00027980614686384797, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3471, "train_speed(iter/s)": 0.027573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 92.53125381469727, "completions/min_length": 48.5, "epoch": 5.174236783320923, "grad_norm": 0.003901420306620293, "kl": 0.2900390625, "learning_rate": 4.803759999255859e-07, "loss": 0.0002903237473219633, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3472, "train_speed(iter/s)": 0.027576 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 96.42708587646484, "completions/min_length": 44.0, "epoch": 5.17572598659717, "grad_norm": 0.004133085634242103, "kl": 0.274658203125, "learning_rate": 4.801396898698329e-07, "loss": 0.00027426553424447775, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3473, "train_speed(iter/s)": 0.027578 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 97.38541793823242, "completions/min_length": 36.5, "epoch": 5.177215189873418, "grad_norm": 1.336292421513614, "kl": 0.30859375, "learning_rate": 4.799033842571998e-07, "loss": -0.001093971892260015, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3474, "train_speed(iter/s)": 0.027577 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 84.11458587646484, "completions/min_length": 40.75, "epoch": 5.178704393149665, "grad_norm": 0.003800342705624699, "kl": 0.33056640625, "learning_rate": 4.796670831405523e-07, "loss": 0.0003307221340946853, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3475, "train_speed(iter/s)": 0.027574 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 94.14583587646484, "completions/min_length": 36.0, "epoch": 5.180193596425912, "grad_norm": 0.004401169482659586, "kl": 0.28955078125, "learning_rate": 4.794307865727554e-07, "loss": 0.0002890304895117879, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3476, "train_speed(iter/s)": 0.027572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 81.84375381469727, "completions/min_length": 38.0, "epoch": 5.181682799702159, "grad_norm": 0.004124881436580417, "kl": 0.3154296875, "learning_rate": 4.791944946066734e-07, "loss": 0.0003151576966047287, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3477, "train_speed(iter/s)": 0.027573 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.5, "completions/mean_length": 84.67708587646484, "completions/min_length": 32.5, "epoch": 5.183172002978407, "grad_norm": 1.3625812094458107, "kl": 0.314453125, "learning_rate": 4.789582072951685e-07, "loss": 0.008953044191002846, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3478, "train_speed(iter/s)": 0.027572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 94.90625190734863, "completions/min_length": 43.0, "epoch": 5.1846612062546535, "grad_norm": 0.004118418764077728, "kl": 0.2841796875, "learning_rate": 4.787219246911034e-07, "loss": 0.00028395489789545536, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3479, "train_speed(iter/s)": 0.027568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 100.41666793823242, "completions/min_length": 52.5, "epoch": 5.186150409530901, "grad_norm": 0.0042109647189021784, "kl": 0.271240234375, "learning_rate": 4.784856468473381e-07, "loss": 0.0002712989808060229, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3480, "train_speed(iter/s)": 0.027568 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 88.39583587646484, "completions/min_length": 38.0, "epoch": 5.187639612807148, "grad_norm": 1.4286091398985488, "kl": 0.30615234375, "learning_rate": 4.782493738167327e-07, "loss": -0.005264680366963148, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3481, "train_speed(iter/s)": 0.027566 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 90.19791793823242, "completions/min_length": 46.25, "epoch": 5.189128816083396, "grad_norm": 3.041195303017868, "kl": 0.3037109375, "learning_rate": 4.780131056521459e-07, "loss": -0.008031648583710194, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.3085566312074661, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3482, "train_speed(iter/s)": 0.027564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.25, "completions/mean_length": 82.19791984558105, "completions/min_length": 38.25, "epoch": 5.1906180193596425, "grad_norm": 0.0040979386675841795, "kl": 0.30517578125, "learning_rate": 4.777768424064352e-07, "loss": 0.0003050474915653467, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3483, "train_speed(iter/s)": 0.027563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.25, "completions/mean_length": 81.62500381469727, "completions/min_length": 33.5, "epoch": 5.19210722263589, "grad_norm": 0.0038775938768250843, "kl": 0.31396484375, "learning_rate": 4.775405841324571e-07, "loss": 0.00031377244158647954, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3484, "train_speed(iter/s)": 0.027561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 89.30208396911621, "completions/min_length": 32.5, "epoch": 5.193596425912137, "grad_norm": 0.003986841828206401, "kl": 0.31884765625, "learning_rate": 4.773043308830669e-07, "loss": 0.0003194223972968757, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3485, "train_speed(iter/s)": 0.027562 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 90.36458778381348, "completions/min_length": 44.75, "epoch": 5.195085629188384, "grad_norm": 2.6237003351007093, "kl": 0.37255859375, "learning_rate": 4.770680827111189e-07, "loss": -0.0028980495408177376, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.770833358168602, "rewards/CineAccuracyORM/std": 0.34146176278591156, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3486, "train_speed(iter/s)": 0.027564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 90.02083587646484, "completions/min_length": 39.75, "epoch": 5.1965748324646315, "grad_norm": 1.7494919241186102, "kl": 0.31103515625, "learning_rate": 4.768318396694662e-07, "loss": -0.006186432205140591, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.12724433839321136, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3487, "train_speed(iter/s)": 0.027563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 82.52083587646484, "completions/min_length": 44.0, "epoch": 5.198064035740878, "grad_norm": 0.003887695927146302, "kl": 0.314453125, "learning_rate": 4.7659560181096067e-07, "loss": 0.0003153703291900456, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3488, "train_speed(iter/s)": 0.027564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 86.79166984558105, "completions/min_length": 43.75, "epoch": 5.199553239017126, "grad_norm": 0.0039654299430491955, "kl": 0.31640625, "learning_rate": 4.7635936918845334e-07, "loss": 0.00031575921457260847, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3489, "train_speed(iter/s)": 0.027567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 89.04166984558105, "completions/min_length": 36.75, "epoch": 5.201042442293373, "grad_norm": 0.0038990220726703387, "kl": 0.31298828125, "learning_rate": 4.761231418547934e-07, "loss": 0.0003127785457763821, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3490, "train_speed(iter/s)": 0.027565 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 93.96875381469727, "completions/min_length": 38.0, "epoch": 5.2025316455696204, "grad_norm": 0.654641742512971, "kl": 0.279296875, "learning_rate": 4.7588691986282953e-07, "loss": -0.018637042492628098, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3491, "train_speed(iter/s)": 0.027567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 88.78125381469727, "completions/min_length": 30.25, "epoch": 5.204020848845867, "grad_norm": 0.004209083758262472, "kl": 0.30029296875, "learning_rate": 4.7565070326540915e-07, "loss": 0.00030098832212388515, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3492, "train_speed(iter/s)": 0.027569 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 85.41666984558105, "completions/min_length": 34.75, "epoch": 5.205510052122115, "grad_norm": 0.7329504113130646, "kl": 0.29931640625, "learning_rate": 4.75414492115378e-07, "loss": 0.009301194921135902, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3493, "train_speed(iter/s)": 0.027572 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.5, "completions/mean_length": 91.36458587646484, "completions/min_length": 33.5, "epoch": 5.206999255398362, "grad_norm": 0.0038269286332308387, "kl": 0.30712890625, "learning_rate": 4.7517828646558106e-07, "loss": 0.0003072537947446108, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3494, "train_speed(iter/s)": 0.027571 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 91.43750381469727, "completions/min_length": 36.25, "epoch": 5.208488458674609, "grad_norm": 0.020034143853484974, "kl": 0.318359375, "learning_rate": 4.749420863688617e-07, "loss": 0.0003183075459674001, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3495, "train_speed(iter/s)": 0.027567 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 84.55208587646484, "completions/min_length": 40.5, "epoch": 5.209977661950856, "grad_norm": 0.003536282860096495, "kl": 0.3095703125, "learning_rate": 4.7470589187806253e-07, "loss": 0.00030926510225981474, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3496, "train_speed(iter/s)": 0.027564 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 85.53125381469727, "completions/min_length": 44.5, "epoch": 5.211466865227104, "grad_norm": 1.8271354312344148, "kl": 0.3154296875, "learning_rate": 4.744697030460247e-07, "loss": -0.006367639638483524, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3497, "train_speed(iter/s)": 0.027561 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 86.11458396911621, "completions/min_length": 39.25, "epoch": 5.212956068503351, "grad_norm": 0.0036075112279431187, "kl": 0.29638671875, "learning_rate": 4.742335199255877e-07, "loss": 0.00029670732328668237, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3498, "train_speed(iter/s)": 0.027562 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.25, "completions/mean_length": 93.17708778381348, "completions/min_length": 39.5, "epoch": 5.2144452717795975, "grad_norm": 0.0036582497426155425, "kl": 0.3056640625, "learning_rate": 4.7399734256959035e-07, "loss": 0.00030542476451955736, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3499, "train_speed(iter/s)": 0.02756 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 84.91666793823242, "completions/min_length": 37.5, "epoch": 5.215934475055845, "grad_norm": 1.5133029381198304, "kl": 0.3291015625, "learning_rate": 4.7376117103086973e-07, "loss": 0.007374337874352932, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3500, "train_speed(iter/s)": 0.027563 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 85.17708587646484, "completions/min_length": 41.5, "epoch": 5.217423678332092, "grad_norm": 0.0031463800282029116, "kl": 0.27880859375, "learning_rate": 4.7352500536226177e-07, "loss": 0.0002792162704281509, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3501, "train_speed(iter/s)": 0.027555 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 98.65625381469727, "completions/min_length": 38.0, "epoch": 5.21891288160834, "grad_norm": 0.003755889533904376, "kl": 0.287109375, "learning_rate": 4.7328884561660146e-07, "loss": 0.0002869843738153577, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3502, "train_speed(iter/s)": 0.027552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 95.57291793823242, "completions/min_length": 42.0, "epoch": 5.2204020848845865, "grad_norm": 2.7107655380594866, "kl": 0.3056640625, "learning_rate": 4.7305269184672165e-07, "loss": -0.016923435032367706, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.12028077617287636, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3503, "train_speed(iter/s)": 0.027546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 82.50000190734863, "completions/min_length": 38.5, "epoch": 5.221891288160834, "grad_norm": 0.003104728549405423, "kl": 0.29736328125, "learning_rate": 4.728165441054548e-07, "loss": 0.0002972165239043534, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3504, "train_speed(iter/s)": 0.027544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 84.01041984558105, "completions/min_length": 36.75, "epoch": 5.223380491437081, "grad_norm": 1.5930373337931316, "kl": 0.30810546875, "learning_rate": 4.7258040244563096e-07, "loss": -0.007056513335555792, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3505, "train_speed(iter/s)": 0.027547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 85.18750381469727, "completions/min_length": 42.0, "epoch": 5.224869694713329, "grad_norm": 0.0033941640114841538, "kl": 0.287109375, "learning_rate": 4.7234426692007977e-07, "loss": 0.0002875146456062794, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3506, "train_speed(iter/s)": 0.02755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 94.67708587646484, "completions/min_length": 38.25, "epoch": 5.226358897989575, "grad_norm": 0.003433854225650625, "kl": 0.29443359375, "learning_rate": 4.721081375816291e-07, "loss": 0.0002948419423773885, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3507, "train_speed(iter/s)": 0.027548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.25, "completions/mean_length": 96.44791984558105, "completions/min_length": 33.25, "epoch": 5.227848101265823, "grad_norm": 0.0038965410020556724, "kl": 0.302734375, "learning_rate": 4.718720144831054e-07, "loss": 0.0003026674676220864, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3508, "train_speed(iter/s)": 0.027547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 91.36458587646484, "completions/min_length": 44.0, "epoch": 5.22933730454207, "grad_norm": 0.8546745069799644, "kl": 0.292724609375, "learning_rate": 4.716358976773341e-07, "loss": 0.013316317461431026, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3509, "train_speed(iter/s)": 0.027547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 80.86458587646484, "completions/min_length": 37.25, "epoch": 5.230826507818318, "grad_norm": 0.003336140411643792, "kl": 0.3046875, "learning_rate": 4.713997872171383e-07, "loss": 0.00030446387245319784, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3510, "train_speed(iter/s)": 0.027546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 82.69791984558105, "completions/min_length": 38.0, "epoch": 5.232315711094564, "grad_norm": 0.0033519997542145854, "kl": 0.32177734375, "learning_rate": 4.7116368315534067e-07, "loss": 0.00032155733788385987, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3511, "train_speed(iter/s)": 0.027547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.25, "completions/mean_length": 85.03125190734863, "completions/min_length": 35.25, "epoch": 5.233804914370811, "grad_norm": 0.003949246369380212, "kl": 0.3232421875, "learning_rate": 4.7092758554476206e-07, "loss": 0.00032354958239011467, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3512, "train_speed(iter/s)": 0.027549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 87.81250190734863, "completions/min_length": 40.5, "epoch": 5.235294117647059, "grad_norm": 0.003948027118592221, "kl": 0.314453125, "learning_rate": 4.706914944382218e-07, "loss": 0.0003142167115584016, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3513, "train_speed(iter/s)": 0.027552 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 89.83333587646484, "completions/min_length": 41.5, "epoch": 5.236783320923306, "grad_norm": 0.0039673745202071726, "kl": 0.269287109375, "learning_rate": 4.704554098885381e-07, "loss": 0.0002692977141123265, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3514, "train_speed(iter/s)": 0.02755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 92.05208587646484, "completions/min_length": 44.25, "epoch": 5.238272524199553, "grad_norm": 0.005064413084496872, "kl": 0.30029296875, "learning_rate": 4.70219331948527e-07, "loss": 0.00030029454501345754, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3515, "train_speed(iter/s)": 0.027551 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 96.88541984558105, "completions/min_length": 46.75, "epoch": 5.2397617274758, "grad_norm": 0.004103941779302189, "kl": 0.27734375, "learning_rate": 4.699832606710038e-07, "loss": 0.00027740103541873395, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3516, "train_speed(iter/s)": 0.02755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 89.66666984558105, "completions/min_length": 38.5, "epoch": 5.241250930752048, "grad_norm": 0.0035166278621315264, "kl": 0.291015625, "learning_rate": 4.6974719610878214e-07, "loss": 0.0002913748612627387, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3517, "train_speed(iter/s)": 0.027548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.25, "completions/mean_length": 101.20833587646484, "completions/min_length": 48.0, "epoch": 5.242740134028295, "grad_norm": 1.9044727022094772, "kl": 0.271484375, "learning_rate": 4.695111383146737e-07, "loss": 0.0029489293228834867, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3518, "train_speed(iter/s)": 0.027544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 86.72916793823242, "completions/min_length": 44.0, "epoch": 5.244229337304542, "grad_norm": 2.3275438509761113, "kl": 0.31884765625, "learning_rate": 4.6927508734148927e-07, "loss": -0.009915941394865513, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.46742958575487137, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3519, "train_speed(iter/s)": 0.027545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 90.76041984558105, "completions/min_length": 36.75, "epoch": 5.245718540580789, "grad_norm": 0.0037138273709926076, "kl": 0.32470703125, "learning_rate": 4.6903904324203765e-07, "loss": 0.00032492188620381057, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3520, "train_speed(iter/s)": 0.027542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/mean_length": 89.13541984558105, "completions/min_length": 48.25, "epoch": 5.247207743857037, "grad_norm": 0.003844866899897393, "kl": 0.28857421875, "learning_rate": 4.688030060691263e-07, "loss": 0.0002890216710511595, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3521, "train_speed(iter/s)": 0.027543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 87.65625190734863, "completions/min_length": 40.0, "epoch": 5.248696947133284, "grad_norm": 0.0039010613103034326, "kl": 0.29736328125, "learning_rate": 4.685669758755615e-07, "loss": 0.00029718922451138496, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3522, "train_speed(iter/s)": 0.027546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 90.04166984558105, "completions/min_length": 31.5, "epoch": 5.250186150409531, "grad_norm": 0.003913960065391152, "kl": 0.310546875, "learning_rate": 4.683309527141469e-07, "loss": 0.00031044133356772363, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3523, "train_speed(iter/s)": 0.027544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 86.97916793823242, "completions/min_length": 37.0, "epoch": 5.251675353685778, "grad_norm": 0.0036741800195538715, "kl": 0.302734375, "learning_rate": 4.6809493663768575e-07, "loss": 0.000302690954413265, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3524, "train_speed(iter/s)": 0.027545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.0, "completions/mean_length": 80.90625190734863, "completions/min_length": 37.25, "epoch": 5.253164556962025, "grad_norm": 0.5705945166420202, "kl": 0.3125, "learning_rate": 4.678589276989792e-07, "loss": -0.001564201433211565, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3525, "train_speed(iter/s)": 0.027548 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 87.67708396911621, "completions/min_length": 46.75, "epoch": 5.254653760238273, "grad_norm": 0.0040573018501886176, "kl": 0.310546875, "learning_rate": 4.6762292595082654e-07, "loss": 0.00031013289117254317, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3526, "train_speed(iter/s)": 0.027549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 83.35416984558105, "completions/min_length": 36.75, "epoch": 5.256142963514519, "grad_norm": 0.005048369143550325, "kl": 0.32470703125, "learning_rate": 4.6738693144602615e-07, "loss": 0.00032439950155094266, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3527, "train_speed(iter/s)": 0.027549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.25, "completions/mean_length": 88.29166793823242, "completions/min_length": 40.5, "epoch": 5.257632166790767, "grad_norm": 0.00394115861435798, "kl": 0.291015625, "learning_rate": 4.671509442373739e-07, "loss": 0.0002908852184191346, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3528, "train_speed(iter/s)": 0.027549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.0, "completions/mean_length": 100.81250381469727, "completions/min_length": 44.5, "epoch": 5.259121370067014, "grad_norm": 1.9750644274877802, "kl": 0.28759765625, "learning_rate": 4.669149643776649e-07, "loss": -0.022460460662841797, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3529, "train_speed(iter/s)": 0.027547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 92.41666793823242, "completions/min_length": 39.25, "epoch": 5.2606105733432615, "grad_norm": 0.0035599265054295846, "kl": 0.31689453125, "learning_rate": 4.6667899191969216e-07, "loss": 0.00031679304083809257, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3530, "train_speed(iter/s)": 0.027546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 93.59375190734863, "completions/min_length": 43.25, "epoch": 5.262099776619508, "grad_norm": 1.6688752602772015, "kl": 0.26708984375, "learning_rate": 4.664430269162468e-07, "loss": 0.015385176055133343, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3531, "train_speed(iter/s)": 0.027546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 95.31250190734863, "completions/min_length": 40.0, "epoch": 5.263588979895756, "grad_norm": 0.6798542050316252, "kl": 0.283203125, "learning_rate": 4.66207069420119e-07, "loss": -0.001147639937698841, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3532, "train_speed(iter/s)": 0.027544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 80.64583587646484, "completions/min_length": 29.5, "epoch": 5.265078183172003, "grad_norm": 0.003455804463410809, "kl": 0.3154296875, "learning_rate": 4.659711194840964e-07, "loss": 0.00031506578670814633, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3533, "train_speed(iter/s)": 0.027544 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 91.46875381469727, "completions/min_length": 35.75, "epoch": 5.2665673864482505, "grad_norm": 1.4569616677017843, "kl": 0.38134765625, "learning_rate": 4.657351771609656e-07, "loss": -0.0019556176848709583, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3534, "train_speed(iter/s)": 0.027542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 91.26041984558105, "completions/min_length": 40.75, "epoch": 5.268056589724497, "grad_norm": 0.0036488718025896945, "kl": 0.2998046875, "learning_rate": 4.654992425035115e-07, "loss": 0.00029978028032928705, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3535, "train_speed(iter/s)": 0.027543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.75, "completions/mean_length": 92.02083587646484, "completions/min_length": 42.25, "epoch": 5.269545793000745, "grad_norm": 0.003978289246207614, "kl": 0.3056640625, "learning_rate": 4.6526331556451663e-07, "loss": 0.0003051243838854134, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3536, "train_speed(iter/s)": 0.027545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 93.86458778381348, "completions/min_length": 46.5, "epoch": 5.271034996276992, "grad_norm": 0.0038429247746910736, "kl": 0.28857421875, "learning_rate": 4.650273963967625e-07, "loss": 0.00028839727747254074, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3537, "train_speed(iter/s)": 0.027546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 96.21875190734863, "completions/min_length": 38.25, "epoch": 5.272524199553239, "grad_norm": 1.685314885335932, "kl": 0.279296875, "learning_rate": 4.647914850530284e-07, "loss": 0.0009338540839962661, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3538, "train_speed(iter/s)": 0.027546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 89.83333587646484, "completions/min_length": 38.5, "epoch": 5.274013402829486, "grad_norm": 0.0035935237753081323, "kl": 0.3037109375, "learning_rate": 4.645555815860922e-07, "loss": 0.0003036491107195616, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3539, "train_speed(iter/s)": 0.027547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 97.42708587646484, "completions/min_length": 44.25, "epoch": 5.275502606105733, "grad_norm": 0.0036261469855143297, "kl": 0.3017578125, "learning_rate": 4.643196860487301e-07, "loss": 0.00030152438557706773, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3540, "train_speed(iter/s)": 0.027545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 87.26041793823242, "completions/min_length": 43.75, "epoch": 5.276991809381981, "grad_norm": 0.005141001851099412, "kl": 0.30126953125, "learning_rate": 4.640837984937159e-07, "loss": 0.0003012167289853096, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3541, "train_speed(iter/s)": 0.027547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 83.65625190734863, "completions/min_length": 41.25, "epoch": 5.2784810126582276, "grad_norm": 0.0031021186418528253, "kl": 0.29052734375, "learning_rate": 4.638479189738224e-07, "loss": 0.0002906103036366403, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3542, "train_speed(iter/s)": 0.02755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 85.25000190734863, "completions/min_length": 35.25, "epoch": 5.279970215934475, "grad_norm": 2.8179926082594227, "kl": 0.33251953125, "learning_rate": 4.636120475418197e-07, "loss": 0.0005465762224048376, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3543, "train_speed(iter/s)": 0.027549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 88.06250190734863, "completions/min_length": 36.5, "epoch": 5.281459419210722, "grad_norm": 0.008996352610286227, "kl": 0.3173828125, "learning_rate": 4.6337618425047705e-07, "loss": 0.0003175007295794785, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3544, "train_speed(iter/s)": 0.027547 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 89.42708587646484, "completions/min_length": 32.5, "epoch": 5.28294862248697, "grad_norm": 2.1306275841237516, "kl": 0.2978515625, "learning_rate": 4.631403291525614e-07, "loss": 0.016832783818244934, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3545, "train_speed(iter/s)": 0.02755 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 89.60416984558105, "completions/min_length": 35.0, "epoch": 5.2844378257632165, "grad_norm": 1.090597976250448, "kl": 0.275634765625, "learning_rate": 4.629044823008378e-07, "loss": -0.010630167089402676, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3546, "train_speed(iter/s)": 0.027549 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 92.43750381469727, "completions/min_length": 44.5, "epoch": 5.285927029039464, "grad_norm": 0.003661970841359641, "kl": 0.28271484375, "learning_rate": 4.6266864374806975e-07, "loss": 0.0002827478456310928, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3547, "train_speed(iter/s)": 0.027545 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 91.77083587646484, "completions/min_length": 40.75, "epoch": 5.287416232315711, "grad_norm": 0.003553392573618277, "kl": 0.2900390625, "learning_rate": 4.6243281354701836e-07, "loss": 0.0002898403618019074, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3548, "train_speed(iter/s)": 0.027543 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.75, "completions/mean_length": 89.31250190734863, "completions/min_length": 43.0, "epoch": 5.288905435591959, "grad_norm": 0.0038575727195886878, "kl": 0.30224609375, "learning_rate": 4.621969917504435e-07, "loss": 0.00030209458782337606, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3549, "train_speed(iter/s)": 0.027546 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.75, "completions/mean_length": 85.70833587646484, "completions/min_length": 40.25, "epoch": 5.2903946388682055, "grad_norm": 0.8045190013117636, "kl": 0.3408203125, "learning_rate": 4.619611784111028e-07, "loss": 0.021106921136379242, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3550, "train_speed(iter/s)": 0.027542 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 95.72916984558105, "completions/min_length": 47.5, "epoch": 5.291883842144452, "grad_norm": 2.2943308597877747, "kl": 0.296875, "learning_rate": 4.617253735817521e-07, "loss": -0.004191666375845671, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3551, "train_speed(iter/s)": 0.027539 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 90.69791793823242, "completions/min_length": 45.25, "epoch": 5.2933730454207, "grad_norm": 0.003591200091745116, "kl": 0.298828125, "learning_rate": 4.614895773151455e-07, "loss": 0.00029883472598157823, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3552, "train_speed(iter/s)": 0.027533 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.25, "completions/mean_length": 105.83333396911621, "completions/min_length": 49.5, "epoch": 5.294862248696947, "grad_norm": 0.7348080468034154, "kl": 0.277099609375, "learning_rate": 4.6125378966403456e-07, "loss": -0.021511031314730644, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3553, "train_speed(iter/s)": 0.027534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.25, "completions/mean_length": 91.88541793823242, "completions/min_length": 43.0, "epoch": 5.2963514519731945, "grad_norm": 0.008014821145917067, "kl": 0.28955078125, "learning_rate": 4.6101801068116957e-07, "loss": 0.000289735704427585, "memory(GiB)": 112.53, "reward": 1.5, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3554, "train_speed(iter/s)": 0.027536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 92.34375190734863, "completions/min_length": 42.5, "epoch": 5.297840655249441, "grad_norm": 0.003924721710459421, "kl": 0.31103515625, "learning_rate": 4.6078224041929894e-07, "loss": 0.0003109260287601501, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3555, "train_speed(iter/s)": 0.027536 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 92.70833587646484, "completions/min_length": 37.75, "epoch": 5.299329858525689, "grad_norm": 0.004107435804962701, "kl": 0.28662109375, "learning_rate": 4.605464789311683e-07, "loss": 0.00028651877073571086, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3556, "train_speed(iter/s)": 0.027534 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.25, "completions/mean_length": 96.79166793823242, "completions/min_length": 43.5, "epoch": 5.300819061801936, "grad_norm": 0.003963008893906148, "kl": 0.291015625, "learning_rate": 4.603107262695224e-07, "loss": 0.0002910431649070233, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3557, "train_speed(iter/s)": 0.027535 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 111.55208778381348, "completions/min_length": 52.0, "epoch": 5.302308265078183, "grad_norm": 0.6530291229470878, "kl": 0.25927734375, "learning_rate": 4.60074982487103e-07, "loss": 0.0011951717315241694, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833386108279, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3558, "train_speed(iter/s)": 0.02753 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 90.63541984558105, "completions/min_length": 37.5, "epoch": 5.30379746835443, "grad_norm": 1.9608675258557071, "kl": 0.29443359375, "learning_rate": 4.598392476366507e-07, "loss": -0.017030563205480576, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3559, "train_speed(iter/s)": 0.027529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.25, "completions/mean_length": 94.27083587646484, "completions/min_length": 44.0, "epoch": 5.305286671630678, "grad_norm": 0.839983466485712, "kl": 0.2880859375, "learning_rate": 4.596035217709039e-07, "loss": 0.014885438606142998, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3560, "train_speed(iter/s)": 0.027529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.5, "completions/mean_length": 100.27083396911621, "completions/min_length": 44.5, "epoch": 5.306775874906925, "grad_norm": 0.0037258012858156103, "kl": 0.2841796875, "learning_rate": 4.593678049425983e-07, "loss": 0.0002846058923751116, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3561, "train_speed(iter/s)": 0.027529 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 95.73958587646484, "completions/min_length": 45.25, "epoch": 5.308265078183172, "grad_norm": 0.003669453669251452, "kl": 0.29833984375, "learning_rate": 4.591320972044686e-07, "loss": 0.0002984951715916395, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3562, "train_speed(iter/s)": 0.027528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.75, "completions/mean_length": 93.98958587646484, "completions/min_length": 45.5, "epoch": 5.309754281459419, "grad_norm": 0.003547183057100703, "kl": 0.29248046875, "learning_rate": 4.5889639860924673e-07, "loss": 0.0002925872977357358, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3563, "train_speed(iter/s)": 0.027528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 96.20833587646484, "completions/min_length": 47.0, "epoch": 5.311243484735666, "grad_norm": 0.004376156146769426, "kl": 0.2822265625, "learning_rate": 4.586607092096629e-07, "loss": 0.00028232758631929755, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3564, "train_speed(iter/s)": 0.027527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 101.60416984558105, "completions/min_length": 48.0, "epoch": 5.312732688011914, "grad_norm": 0.003895933597256011, "kl": 0.25927734375, "learning_rate": 4.5842502905844557e-07, "loss": 0.0002596033737063408, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3565, "train_speed(iter/s)": 0.027527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 92.52083587646484, "completions/min_length": 39.75, "epoch": 5.3142218912881605, "grad_norm": 0.00399794705523417, "kl": 0.302734375, "learning_rate": 4.5818935820832013e-07, "loss": 0.000302566506434232, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3566, "train_speed(iter/s)": 0.027524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 89.44791793823242, "completions/min_length": 37.5, "epoch": 5.315711094564408, "grad_norm": 0.003795259130473236, "kl": 0.29248046875, "learning_rate": 4.57953696712011e-07, "loss": 0.0002924509171862155, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3567, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 90.80208587646484, "completions/min_length": 39.5, "epoch": 5.317200297840655, "grad_norm": 2.4212688018358484, "kl": 0.3662109375, "learning_rate": 4.577180446222397e-07, "loss": -0.010378686711192131, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.3192720115184784, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3568, "train_speed(iter/s)": 0.027519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 93.37500381469727, "completions/min_length": 50.5, "epoch": 5.318689501116903, "grad_norm": 0.003972730710398689, "kl": 0.2783203125, "learning_rate": 4.574824019917261e-07, "loss": 0.00027840567054226995, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3569, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 88.15625190734863, "completions/min_length": 35.25, "epoch": 5.320178704393149, "grad_norm": 0.0037163575547441464, "kl": 0.30810546875, "learning_rate": 4.5724676887318797e-07, "loss": 0.0003079765010625124, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3570, "train_speed(iter/s)": 0.027522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 101.97916984558105, "completions/min_length": 44.25, "epoch": 5.321667907669397, "grad_norm": 1.5924189438724274, "kl": 0.28564453125, "learning_rate": 4.570111453193406e-07, "loss": -0.00903470627963543, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3571, "train_speed(iter/s)": 0.027522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 86.44791793823242, "completions/min_length": 42.0, "epoch": 5.323157110945644, "grad_norm": 0.0038836711717398897, "kl": 0.30078125, "learning_rate": 4.5677553138289757e-07, "loss": 0.00030110470834188163, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3572, "train_speed(iter/s)": 0.027525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 94.79166984558105, "completions/min_length": 44.5, "epoch": 5.324646314221892, "grad_norm": 0.005575711271175676, "kl": 0.30419921875, "learning_rate": 4.565399271165699e-07, "loss": 0.00030397885711863637, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3573, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.25, "completions/mean_length": 92.75000190734863, "completions/min_length": 41.0, "epoch": 5.326135517498138, "grad_norm": 0.003926099492578105, "kl": 0.2978515625, "learning_rate": 4.5630433257306657e-07, "loss": 0.00029834400629624724, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3574, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 94.20833587646484, "completions/min_length": 39.0, "epoch": 5.327624720774386, "grad_norm": 0.004513078397259466, "kl": 0.28564453125, "learning_rate": 4.560687478050947e-07, "loss": 0.0002860402746591717, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3575, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 91.86458587646484, "completions/min_length": 45.5, "epoch": 5.329113924050633, "grad_norm": 0.004004693104365451, "kl": 0.29931640625, "learning_rate": 4.5583317286535876e-07, "loss": 0.0002991912770085037, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3576, "train_speed(iter/s)": 0.027524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 101.66666984558105, "completions/min_length": 42.5, "epoch": 5.33060312732688, "grad_norm": 0.0038823693707457723, "kl": 0.27392578125, "learning_rate": 4.555976078065613e-07, "loss": 0.00027397595113143325, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3577, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/mean_length": 97.42708396911621, "completions/min_length": 48.0, "epoch": 5.332092330603127, "grad_norm": 0.004112648334544677, "kl": 0.29736328125, "learning_rate": 4.5536205268140286e-07, "loss": 0.0002973565424326807, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3578, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.25, "completions/mean_length": 98.02083587646484, "completions/min_length": 41.5, "epoch": 5.333581533879374, "grad_norm": 0.003830896048250775, "kl": 0.2998046875, "learning_rate": 4.5512650754258104e-07, "loss": 0.0002997901465278119, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3579, "train_speed(iter/s)": 0.027517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.5, "completions/mean_length": 95.37500381469727, "completions/min_length": 37.0, "epoch": 5.335070737155622, "grad_norm": 1.8491135167848929, "kl": 0.294921875, "learning_rate": 4.5489097244279203e-07, "loss": -0.015815941616892815, "memory(GiB)": 112.53, "reward": 1.5104166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.5104166716337204, "rewards/CineAccuracyORM/std": 0.4749870151281357, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3580, "train_speed(iter/s)": 0.027517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 97.29166984558105, "completions/min_length": 42.0, "epoch": 5.336559940431869, "grad_norm": 0.0037387360544123005, "kl": 0.269775390625, "learning_rate": 4.54655447434729e-07, "loss": 0.0002698729804251343, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3581, "train_speed(iter/s)": 0.027517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 94.22916984558105, "completions/min_length": 48.5, "epoch": 5.338049143708116, "grad_norm": 0.004469672580131935, "kl": 0.28466796875, "learning_rate": 4.5441993257108357e-07, "loss": 0.00028441016911529005, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3582, "train_speed(iter/s)": 0.027518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.5, "completions/mean_length": 89.08333587646484, "completions/min_length": 41.75, "epoch": 5.339538346984363, "grad_norm": 0.05735109790387691, "kl": 0.3173828125, "learning_rate": 4.5418442790454476e-07, "loss": 0.0003180889179930091, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3583, "train_speed(iter/s)": 0.027518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 98.19791793823242, "completions/min_length": 38.75, "epoch": 5.341027550260611, "grad_norm": 1.1040564111712527, "kl": 0.2783203125, "learning_rate": 4.539489334877992e-07, "loss": 0.020817823708057404, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3584, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.25, "completions/mean_length": 91.64583587646484, "completions/min_length": 50.5, "epoch": 5.342516753536858, "grad_norm": 0.0038518135843623414, "kl": 0.28857421875, "learning_rate": 4.537134493735316e-07, "loss": 0.00028798350831493735, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3585, "train_speed(iter/s)": 0.027519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 97.64583587646484, "completions/min_length": 41.25, "epoch": 5.344005956813105, "grad_norm": 0.7580664667329622, "kl": 0.304931640625, "learning_rate": 4.534779756144238e-07, "loss": 0.004550830461084843, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3586, "train_speed(iter/s)": 0.027519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 104.82291984558105, "completions/min_length": 48.5, "epoch": 5.345495160089352, "grad_norm": 0.004829596803527568, "kl": 0.27587890625, "learning_rate": 4.5324251226315583e-07, "loss": 0.0002758100163191557, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3587, "train_speed(iter/s)": 0.027519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.75, "completions/mean_length": 96.91666984558105, "completions/min_length": 52.0, "epoch": 5.3469843633656, "grad_norm": 1.5248963102547468, "kl": 0.27099609375, "learning_rate": 4.530070593724053e-07, "loss": 0.000271078257355839, "memory(GiB)": 112.53, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3588, "train_speed(iter/s)": 0.027517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 97.12500190734863, "completions/min_length": 35.5, "epoch": 5.348473566641847, "grad_norm": 1.1974278724904743, "kl": 0.28955078125, "learning_rate": 4.527716169948471e-07, "loss": -0.00298529164865613, "memory(GiB)": 112.53, "reward": 1.875, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.1276884824037552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3589, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 96.86458587646484, "completions/min_length": 34.0, "epoch": 5.349962769918093, "grad_norm": 0.003791820332016741, "kl": 0.27685546875, "learning_rate": 4.525361851831545e-07, "loss": 0.00027718141791410744, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3590, "train_speed(iter/s)": 0.027513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 95.93750190734863, "completions/min_length": 49.5, "epoch": 5.351451973194341, "grad_norm": 2.0135797086713585, "kl": 0.2900390625, "learning_rate": 4.523007639899975e-07, "loss": 0.04172074794769287, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7187500298023224, "rewards/CineAccuracyORM/std": 0.4177052788436413, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3591, "train_speed(iter/s)": 0.027513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.5, "completions/mean_length": 112.63542175292969, "completions/min_length": 41.75, "epoch": 5.352941176470588, "grad_norm": 2.2135426211735076, "kl": 0.26513671875, "learning_rate": 4.5206535346804444e-07, "loss": 0.021927548572421074, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.11116771958768368, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.45438022166490555, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3592, "train_speed(iter/s)": 0.02751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 100.33333396911621, "completions/min_length": 42.25, "epoch": 5.3544303797468356, "grad_norm": 0.004367815371079986, "kl": 0.2978515625, "learning_rate": 4.5182995366996106e-07, "loss": 0.00029774318682029843, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3593, "train_speed(iter/s)": 0.027509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.25, "completions/mean_length": 97.51041984558105, "completions/min_length": 46.25, "epoch": 5.355919583023082, "grad_norm": 0.0035638026144168204, "kl": 0.266845703125, "learning_rate": 4.5159456464841047e-07, "loss": 0.00026670339866541326, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3594, "train_speed(iter/s)": 0.027509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 102.62500190734863, "completions/min_length": 48.25, "epoch": 5.35740878629933, "grad_norm": 1.3743774508045234, "kl": 0.27587890625, "learning_rate": 4.513591864560537e-07, "loss": 0.009599295444786549, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.12724433839321136, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3595, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 107.15625190734863, "completions/min_length": 46.0, "epoch": 5.358897989575577, "grad_norm": 0.0041410944596505924, "kl": 0.2724609375, "learning_rate": 4.511238191455491e-07, "loss": 0.00027246944955550134, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3596, "train_speed(iter/s)": 0.027513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 95.22916984558105, "completions/min_length": 49.0, "epoch": 5.3603871928518245, "grad_norm": 0.006402688153248567, "kl": 0.2841796875, "learning_rate": 4.508884627695528e-07, "loss": 0.00028389832004904747, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3597, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.25, "completions/mean_length": 101.04166984558105, "completions/min_length": 39.0, "epoch": 5.361876396128071, "grad_norm": 0.00436324253026374, "kl": 0.28759765625, "learning_rate": 4.5065311738071855e-07, "loss": 0.00028755070525221527, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3598, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 94.63541793823242, "completions/min_length": 48.75, "epoch": 5.363365599404319, "grad_norm": 0.0037811041872744732, "kl": 0.291015625, "learning_rate": 4.5041778303169707e-07, "loss": 0.0002911626361310482, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3599, "train_speed(iter/s)": 0.027517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 97.21875190734863, "completions/min_length": 43.25, "epoch": 5.364854802680566, "grad_norm": 0.005574640816677261, "kl": 0.292724609375, "learning_rate": 4.501824597751374e-07, "loss": 0.00029233776149339974, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3600, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 96.07291984558105, "completions/min_length": 45.75, "epoch": 5.3663440059568135, "grad_norm": 0.9995304979249654, "kl": 0.275390625, "learning_rate": 4.4994714766368547e-07, "loss": -0.0020468123257160187, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3601, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 101.43750190734863, "completions/min_length": 49.25, "epoch": 5.36783320923306, "grad_norm": 0.003384409555921496, "kl": 0.2783203125, "learning_rate": 4.4971184674998515e-07, "loss": 0.0002780285431072116, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3602, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 90.97916984558105, "completions/min_length": 36.75, "epoch": 5.369322412509307, "grad_norm": 0.005122945054545735, "kl": 0.28955078125, "learning_rate": 4.494765570866775e-07, "loss": 0.00028960249619558454, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3603, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 103.73958587646484, "completions/min_length": 43.75, "epoch": 5.370811615785555, "grad_norm": 0.793892785409186, "kl": 0.287353515625, "learning_rate": 4.492412787264011e-07, "loss": 0.0015017190016806126, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3604, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 92.58333587646484, "completions/min_length": 41.0, "epoch": 5.372300819061802, "grad_norm": 0.004846554545274218, "kl": 0.27783203125, "learning_rate": 4.490060117217924e-07, "loss": 0.0002780580543912947, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3605, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 94.25, "completions/min_length": 42.75, "epoch": 5.373790022338049, "grad_norm": 0.004559930772653069, "kl": 0.3037109375, "learning_rate": 4.4877075612548465e-07, "loss": 0.00030332320602610707, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3606, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 87.60416793823242, "completions/min_length": 40.75, "epoch": 5.375279225614296, "grad_norm": 0.003909952524659004, "kl": 0.32421875, "learning_rate": 4.48535511990109e-07, "loss": 0.00032393261790275574, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3607, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 92.54166793823242, "completions/min_length": 41.25, "epoch": 5.376768428890544, "grad_norm": 0.0046162555500295485, "kl": 0.2958984375, "learning_rate": 4.48300279368294e-07, "loss": 0.00029594992520287633, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3608, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.25, "completions/mean_length": 92.63541793823242, "completions/min_length": 45.5, "epoch": 5.3782576321667905, "grad_norm": 0.005301254900080063, "kl": 0.30224609375, "learning_rate": 4.4806505831266546e-07, "loss": 0.00030244392110034823, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3609, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 91.23958587646484, "completions/min_length": 42.75, "epoch": 5.379746835443038, "grad_norm": 0.004352514695388936, "kl": 0.28662109375, "learning_rate": 4.47829848875847e-07, "loss": 0.0002862266846932471, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3610, "train_speed(iter/s)": 0.027519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 103.77083587646484, "completions/min_length": 53.25, "epoch": 5.381236038719285, "grad_norm": 0.0041224893373207686, "kl": 0.263916015625, "learning_rate": 4.4759465111045874e-07, "loss": 0.00026351690758019686, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3611, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 109.89583587646484, "completions/min_length": 34.75, "epoch": 5.382725241995533, "grad_norm": 0.004617897699639942, "kl": 0.26220703125, "learning_rate": 4.473594650691192e-07, "loss": 0.00026170507771894336, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3612, "train_speed(iter/s)": 0.027523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 105.32291984558105, "completions/min_length": 53.5, "epoch": 5.3842144452717795, "grad_norm": 0.5637481423233102, "kl": 0.26904296875, "learning_rate": 4.4712429080444386e-07, "loss": 0.004050006158649921, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3613, "train_speed(iter/s)": 0.027525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.25, "completions/mean_length": 101.75000381469727, "completions/min_length": 45.5, "epoch": 5.385703648548027, "grad_norm": 0.004070906671819067, "kl": 0.27294921875, "learning_rate": 4.4688912836904533e-07, "loss": 0.0002723044017329812, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3614, "train_speed(iter/s)": 0.027528 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 93.07291984558105, "completions/min_length": 34.5, "epoch": 5.387192851824274, "grad_norm": 0.004091067307651792, "kl": 0.31103515625, "learning_rate": 4.4665397781553425e-07, "loss": 0.0003108021919615567, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3615, "train_speed(iter/s)": 0.027525 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 93.20833396911621, "completions/min_length": 40.0, "epoch": 5.388682055100521, "grad_norm": 0.004029332318027082, "kl": 0.2890625, "learning_rate": 4.4641883919651765e-07, "loss": 0.0002889822644647211, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3616, "train_speed(iter/s)": 0.027527 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.0, "completions/mean_length": 105.59375190734863, "completions/min_length": 44.5, "epoch": 5.3901712583767685, "grad_norm": 0.004641605911067331, "kl": 0.2802734375, "learning_rate": 4.4618371256460057e-07, "loss": 0.00027982762549072504, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3617, "train_speed(iter/s)": 0.027523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 96.80208778381348, "completions/min_length": 45.0, "epoch": 5.391660461653015, "grad_norm": 2.3535650152399556, "kl": 0.29638671875, "learning_rate": 4.459485979723855e-07, "loss": -0.006355087738484144, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3618, "train_speed(iter/s)": 0.027524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 97.07291984558105, "completions/min_length": 43.0, "epoch": 5.393149664929263, "grad_norm": 0.003707558933996243, "kl": 0.26513671875, "learning_rate": 4.457134954724715e-07, "loss": 0.0002655880234669894, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3619, "train_speed(iter/s)": 0.027526 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 100.8437557220459, "completions/min_length": 41.5, "epoch": 5.39463886820551, "grad_norm": 0.004400106277958117, "kl": 0.274658203125, "learning_rate": 4.454784051174556e-07, "loss": 0.00027464632876217365, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3620, "train_speed(iter/s)": 0.027524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 94.41666984558105, "completions/min_length": 45.0, "epoch": 5.396128071481757, "grad_norm": 0.004655821268149899, "kl": 0.29736328125, "learning_rate": 4.452433269599317e-07, "loss": 0.000297226884867996, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3621, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.25, "completions/mean_length": 104.11458587646484, "completions/min_length": 48.5, "epoch": 5.397617274758004, "grad_norm": 0.004463545959194882, "kl": 0.280517578125, "learning_rate": 4.4500826105249135e-07, "loss": 0.00028018519515171647, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3622, "train_speed(iter/s)": 0.027517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 93.97916984558105, "completions/min_length": 36.25, "epoch": 5.399106478034252, "grad_norm": 0.003961598015511996, "kl": 0.29052734375, "learning_rate": 4.447732074477232e-07, "loss": 0.00029041210655122995, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3623, "train_speed(iter/s)": 0.027518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 100.30208396911621, "completions/min_length": 44.25, "epoch": 5.400595681310499, "grad_norm": 0.004490685858123083, "kl": 0.27685546875, "learning_rate": 4.445381661982128e-07, "loss": 0.00027652591234073043, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3624, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 99.07291793823242, "completions/min_length": 45.75, "epoch": 5.402084884586746, "grad_norm": 0.004130539715891134, "kl": 0.28662109375, "learning_rate": 4.443031373565435e-07, "loss": 0.00028658381779678166, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3625, "train_speed(iter/s)": 0.027522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 94.63541984558105, "completions/min_length": 43.25, "epoch": 5.403574087862993, "grad_norm": 2.144717670705729, "kl": 0.29736328125, "learning_rate": 4.4406812097529543e-07, "loss": -0.01279296725988388, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3626, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 98.61458587646484, "completions/min_length": 43.25, "epoch": 5.405063291139241, "grad_norm": 0.004648966102329185, "kl": 0.28271484375, "learning_rate": 4.438331171070463e-07, "loss": 0.0002827039861585945, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3627, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 91.14583778381348, "completions/min_length": 46.5, "epoch": 5.406552494415488, "grad_norm": 1.7400432147449665, "kl": 0.26806640625, "learning_rate": 4.435981258043707e-07, "loss": 0.00026771979173645377, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000223517418, "rewards/CineAccuracyORM/std": 0.45633094012737274, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3628, "train_speed(iter/s)": 0.027519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.75, "completions/mean_length": 96.96875190734863, "completions/min_length": 41.5, "epoch": 5.4080416976917345, "grad_norm": 0.003775405857471616, "kl": 0.28369140625, "learning_rate": 4.433631471198405e-07, "loss": 0.0002830030571203679, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3629, "train_speed(iter/s)": 0.027519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 84.23958587646484, "completions/min_length": 38.25, "epoch": 5.409530900967982, "grad_norm": 0.0040564202394602145, "kl": 0.31982421875, "learning_rate": 4.431281811060251e-07, "loss": 0.0003197806072421372, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3630, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 95.57291984558105, "completions/min_length": 44.0, "epoch": 5.411020104244229, "grad_norm": 0.01413638626584592, "kl": 0.26953125, "learning_rate": 4.428932278154903e-07, "loss": 0.0002700659679248929, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3631, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 98.65625190734863, "completions/min_length": 39.5, "epoch": 5.412509307520477, "grad_norm": 0.004160184570968142, "kl": 0.2685546875, "learning_rate": 4.4265828730079977e-07, "loss": 0.00026882754173129797, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3632, "train_speed(iter/s)": 0.027522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 92.17708587646484, "completions/min_length": 39.0, "epoch": 5.4139985107967235, "grad_norm": 0.003820945372010965, "kl": 0.294921875, "learning_rate": 4.424233596145141e-07, "loss": 0.00029493391048163176, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3633, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.25, "completions/mean_length": 111.29166984558105, "completions/min_length": 44.75, "epoch": 5.415487714072971, "grad_norm": 0.13817292915068677, "kl": 0.31201171875, "learning_rate": 4.4218844480919084e-07, "loss": 0.0003114830469712615, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3634, "train_speed(iter/s)": 0.027518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.5, "completions/mean_length": 107.57291984558105, "completions/min_length": 49.0, "epoch": 5.416976917349218, "grad_norm": 0.004191140635782428, "kl": 0.2744140625, "learning_rate": 4.419535429373848e-07, "loss": 0.00027419213438406587, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3635, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 99.40625381469727, "completions/min_length": 37.0, "epoch": 5.418466120625466, "grad_norm": 1.8304689561877525, "kl": 0.2890625, "learning_rate": 4.4171865405164815e-07, "loss": 0.02663971111178398, "memory(GiB)": 112.53, "reward": 1.875, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.2080918326973915, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3636, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 85.65625190734863, "completions/min_length": 34.75, "epoch": 5.419955323901712, "grad_norm": 0.7355696711217944, "kl": 0.28564453125, "learning_rate": 4.414837782045295e-07, "loss": 0.0020783538930118084, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3637, "train_speed(iter/s)": 0.027522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 94.08333396911621, "completions/min_length": 45.5, "epoch": 5.42144452717796, "grad_norm": 0.003640169757087333, "kl": 0.28759765625, "learning_rate": 4.4124891544857514e-07, "loss": 0.00028701251721940935, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3638, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 88.45833587646484, "completions/min_length": 42.5, "epoch": 5.422933730454207, "grad_norm": 0.004206068814196168, "kl": 0.29541015625, "learning_rate": 4.4101406583632823e-07, "loss": 0.0002947358298115432, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3639, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 95.00000190734863, "completions/min_length": 33.25, "epoch": 5.424422933730455, "grad_norm": 1.8849939797849098, "kl": 0.29296875, "learning_rate": 4.4077922942032884e-07, "loss": 0.0026807559188455343, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3640, "train_speed(iter/s)": 0.027523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 99.35416984558105, "completions/min_length": 44.25, "epoch": 5.425912137006701, "grad_norm": 0.004470614786066513, "kl": 0.27001953125, "learning_rate": 4.4054440625311446e-07, "loss": 0.00027026611496694386, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3641, "train_speed(iter/s)": 0.027522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 100.79166793823242, "completions/min_length": 44.0, "epoch": 5.427401340282948, "grad_norm": 0.0041300568082846085, "kl": 0.265869140625, "learning_rate": 4.4030959638721917e-07, "loss": 0.00026563898427411914, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3642, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 90.85416984558105, "completions/min_length": 40.25, "epoch": 5.428890543559196, "grad_norm": 0.6710582931182679, "kl": 0.273681640625, "learning_rate": 4.400747998751746e-07, "loss": -0.014995863661170006, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3643, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 94.36458587646484, "completions/min_length": 44.75, "epoch": 5.430379746835443, "grad_norm": 1.4444097928309754, "kl": 0.30126953125, "learning_rate": 4.398400167695087e-07, "loss": -0.003342829179018736, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3644, "train_speed(iter/s)": 0.027522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 90.95833587646484, "completions/min_length": 42.75, "epoch": 5.43186895011169, "grad_norm": 1.0117183662950995, "kl": 0.301025390625, "learning_rate": 4.396052471227469e-07, "loss": -0.020056797191500664, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3645, "train_speed(iter/s)": 0.027524 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.0, "completions/mean_length": 91.97916793823242, "completions/min_length": 46.25, "epoch": 5.433358153387937, "grad_norm": 1.1937527924567954, "kl": 0.30322265625, "learning_rate": 4.3937049098741174e-07, "loss": 0.030723638832569122, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3646, "train_speed(iter/s)": 0.027523 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 97.79166793823242, "completions/min_length": 41.75, "epoch": 5.434847356664185, "grad_norm": 0.009436927277686816, "kl": 0.2802734375, "learning_rate": 4.391357484160223e-07, "loss": 0.0002800612710416317, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3647, "train_speed(iter/s)": 0.027519 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 99.57291984558105, "completions/min_length": 43.75, "epoch": 5.436336559940432, "grad_norm": 0.003779161578847272, "kl": 0.30029296875, "learning_rate": 4.3890101946109513e-07, "loss": 0.0003000800497829914, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3648, "train_speed(iter/s)": 0.027518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 100.38541984558105, "completions/min_length": 42.75, "epoch": 5.437825763216679, "grad_norm": 0.003916242273267361, "kl": 0.2763671875, "learning_rate": 4.386663041751431e-07, "loss": 0.0002765494864434004, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3649, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.25, "completions/mean_length": 101.67708587646484, "completions/min_length": 38.75, "epoch": 5.439314966492926, "grad_norm": 0.004276695808849233, "kl": 0.280029296875, "learning_rate": 4.3843160261067653e-07, "loss": 0.0002799617068376392, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3650, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 89.86458778381348, "completions/min_length": 36.5, "epoch": 5.440804169769174, "grad_norm": 0.004734760908193013, "kl": 0.28564453125, "learning_rate": 4.3819691482020267e-07, "loss": 0.00028531986754387617, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3651, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 81.84375381469727, "completions/min_length": 37.75, "epoch": 5.442293373045421, "grad_norm": 0.004986645644960035, "kl": 0.32666015625, "learning_rate": 4.3796224085622517e-07, "loss": 0.0003262307436671108, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3652, "train_speed(iter/s)": 0.027518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.75, "completions/mean_length": 95.36458587646484, "completions/min_length": 42.75, "epoch": 5.443782576321668, "grad_norm": 2.769432261808637, "kl": 0.2861328125, "learning_rate": 4.3772758077124526e-07, "loss": 0.00010189841850660741, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3653, "train_speed(iter/s)": 0.02752 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 92.01041793823242, "completions/min_length": 43.5, "epoch": 5.445271779597915, "grad_norm": 0.004199940008026023, "kl": 0.287353515625, "learning_rate": 4.374929346177606e-07, "loss": 0.0002874974743463099, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3654, "train_speed(iter/s)": 0.027521 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 88.41666984558105, "completions/min_length": 34.75, "epoch": 5.446760982874162, "grad_norm": 0.0035933790737598397, "kl": 0.29931640625, "learning_rate": 4.372583024482659e-07, "loss": 0.00029864581301808357, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3655, "train_speed(iter/s)": 0.027522 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.25, "completions/mean_length": 104.31250381469727, "completions/min_length": 45.5, "epoch": 5.44825018615041, "grad_norm": 1.179949229443073, "kl": 0.7294921875, "learning_rate": 4.37023684315253e-07, "loss": 0.002706043189391494, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.19888615608215332, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3656, "train_speed(iter/s)": 0.027518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 91.89583587646484, "completions/min_length": 41.75, "epoch": 5.449739389426656, "grad_norm": 1.886526836199586, "kl": 0.30078125, "learning_rate": 4.3678908027120987e-07, "loss": 0.01263569574803114, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.48803938925266266, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3657, "train_speed(iter/s)": 0.027517 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.0, "completions/mean_length": 85.83333396911621, "completions/min_length": 40.5, "epoch": 5.451228592702904, "grad_norm": 0.0035255579801346908, "kl": 0.328125, "learning_rate": 4.3655449036862213e-07, "loss": 0.00032808221294544637, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3658, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 92.72916984558105, "completions/min_length": 40.5, "epoch": 5.452717795979151, "grad_norm": 0.005032439731032273, "kl": 0.30810546875, "learning_rate": 4.3631991465997163e-07, "loss": 0.00030815438367426395, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3659, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.75, "completions/mean_length": 88.57291984558105, "completions/min_length": 41.5, "epoch": 5.4542069992553985, "grad_norm": 0.0037170399106493312, "kl": 0.31982421875, "learning_rate": 4.360853531977374e-07, "loss": 0.0003206543333362788, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3660, "train_speed(iter/s)": 0.027511 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.25, "completions/mean_length": 93.52083587646484, "completions/min_length": 41.0, "epoch": 5.455696202531645, "grad_norm": 0.9783834111170335, "kl": 0.29736328125, "learning_rate": 4.358508060343956e-07, "loss": -0.029036445543169975, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3661, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 94.88542175292969, "completions/min_length": 42.5, "epoch": 5.457185405807893, "grad_norm": 0.003479218697569794, "kl": 0.2802734375, "learning_rate": 4.356162732224181e-07, "loss": 0.0002805331605486572, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3662, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 88.98958587646484, "completions/min_length": 36.75, "epoch": 5.45867460908414, "grad_norm": 0.0032289438701199953, "kl": 0.296875, "learning_rate": 4.3538175481427483e-07, "loss": 0.0002963457372970879, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3663, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 91.30208587646484, "completions/min_length": 44.5, "epoch": 5.4601638123603875, "grad_norm": 0.004130614818899036, "kl": 0.294921875, "learning_rate": 4.3514725086243143e-07, "loss": 0.00029497325886040926, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3664, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.25, "completions/mean_length": 105.07291984558105, "completions/min_length": 37.75, "epoch": 5.461653015636634, "grad_norm": 0.0034719190168262856, "kl": 0.25634765625, "learning_rate": 4.3491276141935096e-07, "loss": 0.0002569030912127346, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3665, "train_speed(iter/s)": 0.027513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.5, "completions/mean_length": 106.36458587646484, "completions/min_length": 38.75, "epoch": 5.463142218912882, "grad_norm": 0.0036517994548519008, "kl": 0.27099609375, "learning_rate": 4.346782865374932e-07, "loss": 0.000270798453129828, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3666, "train_speed(iter/s)": 0.027509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 100.18750190734863, "completions/min_length": 42.0, "epoch": 5.464631422189129, "grad_norm": 0.004453250394699515, "kl": 0.279541015625, "learning_rate": 4.344438262693143e-07, "loss": 0.0002797330089379102, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3667, "train_speed(iter/s)": 0.027509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 98.64583396911621, "completions/min_length": 47.75, "epoch": 5.466120625465376, "grad_norm": 0.003382992909030403, "kl": 0.272216796875, "learning_rate": 4.342093806672678e-07, "loss": 0.00027213251451030374, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3668, "train_speed(iter/s)": 0.027511 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 98.13541984558105, "completions/min_length": 33.75, "epoch": 5.467609828741623, "grad_norm": 0.004328403799612732, "kl": 0.28369140625, "learning_rate": 4.339749497838029e-07, "loss": 0.00028323163860477507, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3669, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 96.08333587646484, "completions/min_length": 39.0, "epoch": 5.46909903201787, "grad_norm": 0.003834051300918189, "kl": 0.27099609375, "learning_rate": 4.3374053367136646e-07, "loss": 0.00027082356973551214, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3670, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 93.46875190734863, "completions/min_length": 48.75, "epoch": 5.470588235294118, "grad_norm": 0.003398815348117354, "kl": 0.28662109375, "learning_rate": 4.3350613238240183e-07, "loss": 0.0002866558206733316, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3671, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.75, "completions/mean_length": 100.35416793823242, "completions/min_length": 40.5, "epoch": 5.4720774385703645, "grad_norm": 0.014128863724619523, "kl": 0.27783203125, "learning_rate": 4.332717459693488e-07, "loss": 0.0002775838365778327, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3672, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 99.20833587646484, "completions/min_length": 39.25, "epoch": 5.473566641846612, "grad_norm": 0.04595891555951996, "kl": 0.259765625, "learning_rate": 4.330373744846441e-07, "loss": 0.000259557826211676, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3673, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.0, "completions/mean_length": 99.02083587646484, "completions/min_length": 39.5, "epoch": 5.475055845122859, "grad_norm": 0.00473942902181183, "kl": 0.2783203125, "learning_rate": 4.328030179807207e-07, "loss": 0.00027793049230240285, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3674, "train_speed(iter/s)": 0.027518 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 107.16666793823242, "completions/min_length": 47.25, "epoch": 5.476545048399107, "grad_norm": 0.0038507782784779677, "kl": 0.279296875, "learning_rate": 4.3256867651000857e-07, "loss": 0.00027937861159443855, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3675, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 98.69791984558105, "completions/min_length": 42.0, "epoch": 5.4780342516753535, "grad_norm": 0.004821259456787648, "kl": 0.29736328125, "learning_rate": 4.3233435012493456e-07, "loss": 0.0002975879469886422, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3676, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 101.78125190734863, "completions/min_length": 45.5, "epoch": 5.479523454951601, "grad_norm": 0.8592191898573452, "kl": 0.2900390625, "learning_rate": 4.3210003887792137e-07, "loss": -0.00020247563952580094, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3677, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 99.32291984558105, "completions/min_length": 47.5, "epoch": 5.481012658227848, "grad_norm": 0.0033958847542142475, "kl": 0.27197265625, "learning_rate": 4.318657428213891e-07, "loss": 0.0002724596706684679, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3678, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 91.53125190734863, "completions/min_length": 34.5, "epoch": 5.482501861504096, "grad_norm": 0.0031301482317166985, "kl": 0.275390625, "learning_rate": 4.316314620077539e-07, "loss": 0.00027500384021550417, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3679, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 98.58333587646484, "completions/min_length": 46.0, "epoch": 5.4839910647803425, "grad_norm": 0.7862039629355075, "kl": 0.27783203125, "learning_rate": 4.313971964894288e-07, "loss": -0.014987657777965069, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3680, "train_speed(iter/s)": 0.027516 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 89.04166793823242, "completions/min_length": 43.75, "epoch": 5.485480268056589, "grad_norm": 0.003277740113594267, "kl": 0.28759765625, "learning_rate": 4.3116294631882373e-07, "loss": 0.00028743466828018427, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3681, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.75, "completions/mean_length": 102.72916793823242, "completions/min_length": 43.0, "epoch": 5.486969471332837, "grad_norm": 0.0035519786168258945, "kl": 0.27197265625, "learning_rate": 4.3092871154834413e-07, "loss": 0.0002722200006246567, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3682, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 95.47916984558105, "completions/min_length": 33.75, "epoch": 5.488458674609084, "grad_norm": 1.0023646624411449, "kl": 0.293701171875, "learning_rate": 4.3069449223039316e-07, "loss": 0.008851924911141396, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3683, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.25, "completions/mean_length": 95.76042175292969, "completions/min_length": 35.5, "epoch": 5.4899478778853315, "grad_norm": 3.0919382181241724, "kl": 0.29296875, "learning_rate": 4.3046028841736976e-07, "loss": -0.016870111227035522, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3684, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 96.97916793823242, "completions/min_length": 38.75, "epoch": 5.491437081161578, "grad_norm": 1.464447537186025, "kl": 0.277099609375, "learning_rate": 4.3022610016166973e-07, "loss": -0.01818224973976612, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.833333358168602, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3685, "train_speed(iter/s)": 0.027505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 100.56250381469727, "completions/min_length": 41.0, "epoch": 5.492926284437826, "grad_norm": 0.003230939822309959, "kl": 0.27880859375, "learning_rate": 4.2999192751568557e-07, "loss": 0.0002789979916997254, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3686, "train_speed(iter/s)": 0.027505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.5, "completions/mean_length": 93.35416984558105, "completions/min_length": 39.0, "epoch": 5.494415487714073, "grad_norm": 0.0037913273652923508, "kl": 0.291259765625, "learning_rate": 4.2975777053180563e-07, "loss": 0.00029167821048758924, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3687, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 100.84375190734863, "completions/min_length": 47.75, "epoch": 5.49590469099032, "grad_norm": 0.07134244046019021, "kl": 0.33544921875, "learning_rate": 4.2952362926241534e-07, "loss": 0.00033505307510495186, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3688, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.5, "completions/mean_length": 104.11458778381348, "completions/min_length": 43.0, "epoch": 5.497393894266567, "grad_norm": 0.003353345696770912, "kl": 0.258544921875, "learning_rate": 4.2928950375989676e-07, "loss": 0.00025792716769501567, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3689, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 98.15625381469727, "completions/min_length": 44.75, "epoch": 5.498883097542815, "grad_norm": 0.003717689843905726, "kl": 0.29443359375, "learning_rate": 4.290553940766276e-07, "loss": 0.0002942974679172039, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3690, "train_speed(iter/s)": 0.027509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.25, "completions/mean_length": 106.39583587646484, "completions/min_length": 49.5, "epoch": 5.500372300819062, "grad_norm": 0.0035394253572736674, "kl": 0.275390625, "learning_rate": 4.288213002649829e-07, "loss": 0.00027538195718079805, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3691, "train_speed(iter/s)": 0.02751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 94.78125190734863, "completions/min_length": 47.5, "epoch": 5.501861504095309, "grad_norm": 1.8501684314219087, "kl": 0.266357421875, "learning_rate": 4.2858722237733347e-07, "loss": 0.0024685156531631947, "memory(GiB)": 112.53, "reward": 1.875, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.1276884824037552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3692, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.5, "completions/mean_length": 104.03125381469727, "completions/min_length": 46.0, "epoch": 5.503350707371556, "grad_norm": 0.003678980008359738, "kl": 0.26611328125, "learning_rate": 4.283531604660471e-07, "loss": 0.0002654974232427776, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3693, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 108.03125190734863, "completions/min_length": 39.75, "epoch": 5.504839910647803, "grad_norm": 0.0034581590737498553, "kl": 0.2890625, "learning_rate": 4.281191145834879e-07, "loss": 0.00028854861739091575, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3694, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 91.79166984558105, "completions/min_length": 32.5, "epoch": 5.506329113924051, "grad_norm": 0.004033948808764, "kl": 0.31396484375, "learning_rate": 4.2788508478201603e-07, "loss": 0.0003143712820019573, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3695, "train_speed(iter/s)": 0.027511 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.5, "completions/mean_length": 106.14583587646484, "completions/min_length": 44.5, "epoch": 5.5078183172002975, "grad_norm": 0.003753587690265936, "kl": 0.25732421875, "learning_rate": 4.276510711139884e-07, "loss": 0.00025720405392348766, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3696, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 107.88541984558105, "completions/min_length": 45.5, "epoch": 5.509307520476545, "grad_norm": 0.003189253035208483, "kl": 0.26171875, "learning_rate": 4.274170736317581e-07, "loss": 0.00026123877614736557, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3697, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.25, "completions/mean_length": 98.20833587646484, "completions/min_length": 41.75, "epoch": 5.510796723752792, "grad_norm": 1.928273865958692, "kl": 0.27880859375, "learning_rate": 4.271830923876748e-07, "loss": -0.004049734212458134, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3698, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.25, "completions/mean_length": 110.72916793823242, "completions/min_length": 47.25, "epoch": 5.51228592702904, "grad_norm": 0.40439132336314393, "kl": 0.5654296875, "learning_rate": 4.269491274340847e-07, "loss": 0.0005649033701047301, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3699, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 99.20833778381348, "completions/min_length": 40.5, "epoch": 5.513775130305286, "grad_norm": 0.00347830552837941, "kl": 0.261474609375, "learning_rate": 4.2671517882332955e-07, "loss": 0.000261393201071769, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3700, "train_speed(iter/s)": 0.02751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 104.65625, "completions/min_length": 35.25, "epoch": 5.515264333581534, "grad_norm": 1.5938005289827766, "kl": 0.271728515625, "learning_rate": 4.264812466077486e-07, "loss": 0.019283108413219452, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3701, "train_speed(iter/s)": 0.027511 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 90.34375381469727, "completions/min_length": 40.0, "epoch": 5.516753536857781, "grad_norm": 0.004680651117253739, "kl": 0.30908203125, "learning_rate": 4.2624733083967625e-07, "loss": 0.00030931446235626936, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3702, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 101.95833587646484, "completions/min_length": 43.0, "epoch": 5.518242740134029, "grad_norm": 1.4615363457686503, "kl": 0.28759765625, "learning_rate": 4.260134315714441e-07, "loss": -0.0011322564678266644, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3703, "train_speed(iter/s)": 0.027513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.75, "completions/mean_length": 116.20833778381348, "completions/min_length": 45.75, "epoch": 5.519731943410275, "grad_norm": 0.007171110072029799, "kl": 0.26025390625, "learning_rate": 4.257795488553798e-07, "loss": 0.00026054916088469326, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3704, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.75, "completions/mean_length": 103.90625190734863, "completions/min_length": 43.75, "epoch": 5.521221146686523, "grad_norm": 0.0032455079685184177, "kl": 0.27001953125, "learning_rate": 4.2554568274380706e-07, "loss": 0.0002693997521419078, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3705, "train_speed(iter/s)": 0.02751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 102.93750190734863, "completions/min_length": 38.5, "epoch": 5.52271034996277, "grad_norm": 0.8802410603181189, "kl": 0.2841796875, "learning_rate": 4.253118332890464e-07, "loss": 0.02194175496697426, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3706, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.25, "completions/mean_length": 110.36458396911621, "completions/min_length": 44.25, "epoch": 5.524199553239017, "grad_norm": 0.0038455978850784286, "kl": 0.261474609375, "learning_rate": 4.250780005434138e-07, "loss": 0.0002613865362945944, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3707, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 98.28125190734863, "completions/min_length": 47.25, "epoch": 5.525688756515264, "grad_norm": 1.337212278459705, "kl": 0.30029296875, "learning_rate": 4.248441845592223e-07, "loss": 0.009731663390994072, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3708, "train_speed(iter/s)": 0.027509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 108.62500190734863, "completions/min_length": 43.25, "epoch": 5.527177959791511, "grad_norm": 1.078470518488538, "kl": 0.2802734375, "learning_rate": 4.246103853887809e-07, "loss": -0.029756873846054077, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3709, "train_speed(iter/s)": 0.027511 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 113.88541793823242, "completions/min_length": 39.75, "epoch": 5.528667163067759, "grad_norm": 0.0034419572025501995, "kl": 0.267822265625, "learning_rate": 4.243766030843946e-07, "loss": 0.00026783611974678934, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3710, "train_speed(iter/s)": 0.027513 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 110.92708969116211, "completions/min_length": 45.25, "epoch": 5.530156366344006, "grad_norm": 2.1288253403475395, "kl": 0.26806640625, "learning_rate": 4.2414283769836517e-07, "loss": 0.03538808226585388, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000298023224, "rewards/CineAccuracyORM/std": 0.40787915512919426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3711, "train_speed(iter/s)": 0.027515 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.25, "completions/mean_length": 106.79166793823242, "completions/min_length": 38.25, "epoch": 5.531645569620253, "grad_norm": 1.6588340775387196, "kl": 0.27880859375, "learning_rate": 4.2390908928298986e-07, "loss": -0.0050233579240739346, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3712, "train_speed(iter/s)": 0.027514 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 107.63541793823242, "completions/min_length": 36.5, "epoch": 5.5331347728965, "grad_norm": 1.014681054515337, "kl": 0.23974609375, "learning_rate": 4.236753578905627e-07, "loss": -0.016362646594643593, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3713, "train_speed(iter/s)": 0.02751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 109.39583587646484, "completions/min_length": 39.5, "epoch": 5.534623976172748, "grad_norm": 0.009867331486612167, "kl": 0.26318359375, "learning_rate": 4.23441643573374e-07, "loss": 0.00026288643130101264, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3714, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.25, "completions/mean_length": 106.13541984558105, "completions/min_length": 36.5, "epoch": 5.536113179448995, "grad_norm": 1.72495775033948, "kl": 0.26513671875, "learning_rate": 4.232079463837095e-07, "loss": 0.0037027690559625626, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.33919306844472885, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3715, "train_speed(iter/s)": 0.027508 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 105.22916793823242, "completions/min_length": 38.5, "epoch": 5.537602382725242, "grad_norm": 0.0035494582747064513, "kl": 0.28173828125, "learning_rate": 4.22974266373852e-07, "loss": 0.00028164038667455316, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3716, "train_speed(iter/s)": 0.02751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 103.69791984558105, "completions/min_length": 38.5, "epoch": 5.539091586001489, "grad_norm": 0.0034797406650752013, "kl": 0.272705078125, "learning_rate": 4.2274060359607975e-07, "loss": 0.00027271686121821404, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3717, "train_speed(iter/s)": 0.027512 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.75, "completions/mean_length": 129.2083396911621, "completions/min_length": 51.75, "epoch": 5.540580789277737, "grad_norm": 0.7107587449752789, "kl": 0.232666015625, "learning_rate": 4.225069581026676e-07, "loss": -0.0066312989220023155, "memory(GiB)": 112.53, "reward": 1.5208333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333414047956, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3718, "train_speed(iter/s)": 0.02751 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.5, "completions/mean_length": 123.39583969116211, "completions/min_length": 35.0, "epoch": 5.542069992553984, "grad_norm": 0.03461769971042832, "kl": 0.25537109375, "learning_rate": 4.222733299458866e-07, "loss": 0.0002548657066654414, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3719, "train_speed(iter/s)": 0.027507 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.75, "completions/mean_length": 101.75000190734863, "completions/min_length": 42.0, "epoch": 5.54355919583023, "grad_norm": 1.0489129364082568, "kl": 0.2822265625, "learning_rate": 4.220397191780033e-07, "loss": -0.019515296444296837, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3720, "train_speed(iter/s)": 0.027509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 103.34375190734863, "completions/min_length": 41.75, "epoch": 5.545048399106478, "grad_norm": 1.9183207992849736, "kl": 4.26318359375, "learning_rate": 4.21806125851281e-07, "loss": 0.006735526025295258, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3721, "train_speed(iter/s)": 0.027509 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 106.96875190734863, "completions/min_length": 42.0, "epoch": 5.546537602382725, "grad_norm": 0.8689468258806299, "kl": 0.2685546875, "learning_rate": 4.215725500179787e-07, "loss": 0.0017194425454363227, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3722, "train_speed(iter/s)": 0.027507 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.25, "completions/mean_length": 133.7187557220459, "completions/min_length": 46.75, "epoch": 5.5480268056589725, "grad_norm": 1.443868745897337, "kl": 0.239990234375, "learning_rate": 4.2133899173035176e-07, "loss": -0.009163138456642628, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3723, "train_speed(iter/s)": 0.027505 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.5, "completions/mean_length": 124.28125381469727, "completions/min_length": 55.25, "epoch": 5.549516008935219, "grad_norm": 0.003977990994704512, "kl": 0.242919921875, "learning_rate": 4.2110545104065166e-07, "loss": 0.0002427904400974512, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3724, "train_speed(iter/s)": 0.027506 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 109.52083778381348, "completions/min_length": 43.25, "epoch": 5.551005212211467, "grad_norm": 0.6428282559999247, "kl": 0.267578125, "learning_rate": 4.2087192800112543e-07, "loss": -0.002682314021512866, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3725, "train_speed(iter/s)": 0.027504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 106.47916984558105, "completions/min_length": 47.0, "epoch": 5.552494415487714, "grad_norm": 1.007127367983674, "kl": 0.27490234375, "learning_rate": 4.206384226640169e-07, "loss": 0.001420194050297141, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3726, "train_speed(iter/s)": 0.027504 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 115.11458587646484, "completions/min_length": 42.5, "epoch": 5.5539836187639615, "grad_norm": 0.004267692487584967, "kl": 0.2587890625, "learning_rate": 4.204049350815652e-07, "loss": 0.00025879297754727304, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3727, "train_speed(iter/s)": 0.027502 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.0, "completions/mean_length": 108.46875190734863, "completions/min_length": 49.0, "epoch": 5.555472822040208, "grad_norm": 1.3398304791862952, "kl": 0.27294921875, "learning_rate": 4.201714653060058e-07, "loss": 0.003257672768086195, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3728, "train_speed(iter/s)": 0.0275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 108.50000381469727, "completions/min_length": 44.5, "epoch": 5.556962025316456, "grad_norm": 0.003690201837186203, "kl": 0.271484375, "learning_rate": 4.199380133895706e-07, "loss": 0.000271652068477124, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3729, "train_speed(iter/s)": 0.027497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.5, "completions/mean_length": 106.58333587646484, "completions/min_length": 43.5, "epoch": 5.558451228592703, "grad_norm": 1.5148171209216579, "kl": 0.25244140625, "learning_rate": 4.197045793844868e-07, "loss": 0.0002526482567191124, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3730, "train_speed(iter/s)": 0.027496 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 114.84375381469727, "completions/min_length": 35.5, "epoch": 5.5599404318689505, "grad_norm": 0.982923612850683, "kl": 0.250244140625, "learning_rate": 4.1947116334297817e-07, "loss": 0.018049906939268112, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3731, "train_speed(iter/s)": 0.027496 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 110.36458778381348, "completions/min_length": 46.0, "epoch": 5.561429635145197, "grad_norm": 1.7666759063473136, "kl": 0.267822265625, "learning_rate": 4.1923776531726395e-07, "loss": -0.004617857746779919, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3732, "train_speed(iter/s)": 0.027497 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 106.09375190734863, "completions/min_length": 28.75, "epoch": 5.562918838421444, "grad_norm": 0.0036432149565031203, "kl": 0.273681640625, "learning_rate": 4.190043853595596e-07, "loss": 0.0002737919567152858, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3733, "train_speed(iter/s)": 0.027496 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 104.12500190734863, "completions/min_length": 36.0, "epoch": 5.564408041697692, "grad_norm": 0.126565402552392, "kl": 0.31787109375, "learning_rate": 4.1877102352207695e-07, "loss": 0.0003183285007253289, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3734, "train_speed(iter/s)": 0.027495 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.0, "completions/mean_length": 123.69791984558105, "completions/min_length": 40.25, "epoch": 5.565897244973939, "grad_norm": 0.005002101891349266, "kl": 0.256103515625, "learning_rate": 4.1853767985702284e-07, "loss": 0.00025662945699878037, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3735, "train_speed(iter/s)": 0.027494 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 110.50000190734863, "completions/min_length": 44.75, "epoch": 5.567386448250186, "grad_norm": 0.003638016843426783, "kl": 0.28125, "learning_rate": 4.183043544166011e-07, "loss": 0.0002813395403791219, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3736, "train_speed(iter/s)": 0.027496 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 107.07291984558105, "completions/min_length": 41.5, "epoch": 5.568875651526433, "grad_norm": 0.0032497898440619274, "kl": 0.283203125, "learning_rate": 4.1807104725301046e-07, "loss": 0.0002835446211975068, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3737, "train_speed(iter/s)": 0.027491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 102.52083587646484, "completions/min_length": 39.25, "epoch": 5.570364854802681, "grad_norm": 0.003698748444663835, "kl": 0.28173828125, "learning_rate": 4.1783775841844625e-07, "loss": 0.0002815296465996653, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3738, "train_speed(iter/s)": 0.027491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.75, "completions/mean_length": 112.17708587646484, "completions/min_length": 40.5, "epoch": 5.5718540580789275, "grad_norm": 0.004061887565061908, "kl": 0.27880859375, "learning_rate": 4.176044879650998e-07, "loss": 0.00027869592304341495, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3739, "train_speed(iter/s)": 0.027491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.0, "completions/mean_length": 114.98958587646484, "completions/min_length": 36.25, "epoch": 5.573343261355175, "grad_norm": 0.0033614659109649507, "kl": 0.25390625, "learning_rate": 4.1737123594515755e-07, "loss": 0.0002536989049986005, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3740, "train_speed(iter/s)": 0.027493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.0, "completions/mean_length": 115.40625190734863, "completions/min_length": 43.5, "epoch": 5.574832464631422, "grad_norm": 0.008439476780471439, "kl": 0.267822265625, "learning_rate": 4.1713800241080256e-07, "loss": 0.00026812631404027343, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3741, "train_speed(iter/s)": 0.027491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.25, "completions/mean_length": 109.58333587646484, "completions/min_length": 44.75, "epoch": 5.57632166790767, "grad_norm": 0.0033050750730465005, "kl": 0.27685546875, "learning_rate": 4.169047874142136e-07, "loss": 0.00027699608472175896, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3742, "train_speed(iter/s)": 0.027493 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 109.36458778381348, "completions/min_length": 40.0, "epoch": 5.5778108711839165, "grad_norm": 0.003916179062110599, "kl": 0.27880859375, "learning_rate": 4.1667159100756494e-07, "loss": 0.00027834626962430775, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3743, "train_speed(iter/s)": 0.027491 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.25, "completions/mean_length": 106.35417175292969, "completions/min_length": 37.5, "epoch": 5.579300074460164, "grad_norm": 1.2714058883049628, "kl": 0.990478515625, "learning_rate": 4.1643841324302725e-07, "loss": -0.022423384711146355, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3744, "train_speed(iter/s)": 0.027488 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 104.50000190734863, "completions/min_length": 41.0, "epoch": 5.580789277736411, "grad_norm": 2.1016233519346392, "kl": 0.28271484375, "learning_rate": 4.162052541727664e-07, "loss": 0.0013542331289499998, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3745, "train_speed(iter/s)": 0.027486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.0, "completions/mean_length": 103.64583587646484, "completions/min_length": 47.25, "epoch": 5.582278481012658, "grad_norm": 0.003563250087820634, "kl": 0.275634765625, "learning_rate": 4.159721138489444e-07, "loss": 0.00027540908195078373, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3746, "train_speed(iter/s)": 0.027484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/mean_length": 111.28125381469727, "completions/min_length": 43.0, "epoch": 5.5837676842889055, "grad_norm": 0.003786489929709757, "kl": 0.2763671875, "learning_rate": 4.1573899232371946e-07, "loss": 0.00027625111397355795, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3747, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 99.65625381469727, "completions/min_length": 45.5, "epoch": 5.585256887565152, "grad_norm": 1.2070876017088825, "kl": 0.267578125, "learning_rate": 4.1550588964924473e-07, "loss": 0.008223136886954308, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3748, "train_speed(iter/s)": 0.027483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 109.36458587646484, "completions/min_length": 44.25, "epoch": 5.5867460908414, "grad_norm": 0.015703891610389115, "kl": 0.25537109375, "learning_rate": 4.1527280587767007e-07, "loss": 0.0002551618672441691, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3749, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.0, "completions/mean_length": 104.23958587646484, "completions/min_length": 44.5, "epoch": 5.588235294117647, "grad_norm": 1.1264846521062284, "kl": 0.28369140625, "learning_rate": 4.150397410611401e-07, "loss": -0.019562682136893272, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3750, "train_speed(iter/s)": 0.027483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 110.39583778381348, "completions/min_length": 39.25, "epoch": 5.589724497393894, "grad_norm": 0.0041016621217861305, "kl": 0.28369140625, "learning_rate": 4.148066952517961e-07, "loss": 0.00028357759583741426, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3751, "train_speed(iter/s)": 0.027483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.25, "completions/mean_length": 94.29166793823242, "completions/min_length": 45.25, "epoch": 5.591213700670141, "grad_norm": 0.003924113846520456, "kl": 0.30126953125, "learning_rate": 4.1457366850177475e-07, "loss": 0.0003010226646438241, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3752, "train_speed(iter/s)": 0.027483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 107.76041793823242, "completions/min_length": 44.0, "epoch": 5.592702903946389, "grad_norm": 0.004110730601168777, "kl": 0.259521484375, "learning_rate": 4.143406608632083e-07, "loss": 0.0002598912687972188, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3753, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 104.15625190734863, "completions/min_length": 36.0, "epoch": 5.594192107222636, "grad_norm": 0.003195759653441745, "kl": 0.27490234375, "learning_rate": 4.1410767238822497e-07, "loss": 0.00027475887327454984, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3754, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 99.73958587646484, "completions/min_length": 47.0, "epoch": 5.595681310498883, "grad_norm": 0.0033036791595694704, "kl": 0.275390625, "learning_rate": 4.138747031289485e-07, "loss": 0.0002750334679149091, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3755, "train_speed(iter/s)": 0.027483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.0, "completions/mean_length": 108.02083587646484, "completions/min_length": 40.75, "epoch": 5.59717051377513, "grad_norm": 0.6866131877913871, "kl": 0.25439453125, "learning_rate": 4.136417531374985e-07, "loss": 0.012726342305541039, "memory(GiB)": 112.53, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3756, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 107.07291984558105, "completions/min_length": 50.25, "epoch": 5.598659717051378, "grad_norm": 0.0031890036111792757, "kl": 0.25732421875, "learning_rate": 4.1340882246599047e-07, "loss": 0.0002574680547695607, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3757, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.25, "completions/mean_length": 107.86458587646484, "completions/min_length": 43.25, "epoch": 5.600148920327625, "grad_norm": 0.5455076916642506, "kl": 0.27392578125, "learning_rate": 4.131759111665348e-07, "loss": -0.012660618871450424, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3758, "train_speed(iter/s)": 0.027479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 97.37500190734863, "completions/min_length": 32.0, "epoch": 5.6016381236038715, "grad_norm": 1.5015324176175922, "kl": 0.28515625, "learning_rate": 4.1294301929123855e-07, "loss": 0.003869467880576849, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3759, "train_speed(iter/s)": 0.027479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 94.88541984558105, "completions/min_length": 39.0, "epoch": 5.603127326880119, "grad_norm": 1.7816175442675841, "kl": 0.3037109375, "learning_rate": 4.127101468922035e-07, "loss": -0.013289414346218109, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3760, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.0, "completions/mean_length": 104.89583778381348, "completions/min_length": 46.5, "epoch": 5.604616530156366, "grad_norm": 0.0032722586563580765, "kl": 0.26953125, "learning_rate": 4.1247729402152785e-07, "loss": 0.00027004798175767064, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3761, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.5, "completions/mean_length": 109.31250190734863, "completions/min_length": 36.75, "epoch": 5.606105733432614, "grad_norm": 0.8140399071027992, "kl": 0.260986328125, "learning_rate": 4.1224446073130524e-07, "loss": 0.018128585070371628, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 3762, "train_speed(iter/s)": 0.027485 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.0, "completions/mean_length": 118.41666984558105, "completions/min_length": 30.25, "epoch": 5.6075949367088604, "grad_norm": 0.0035327123847907205, "kl": 0.2578125, "learning_rate": 4.1201164707362436e-07, "loss": 0.00025735574308782816, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3763, "train_speed(iter/s)": 0.027483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.5, "completions/mean_length": 110.50000190734863, "completions/min_length": 45.25, "epoch": 5.609084139985108, "grad_norm": 2.1100950887599557, "kl": 0.253173828125, "learning_rate": 4.1177885310057036e-07, "loss": -0.005034657660871744, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3764, "train_speed(iter/s)": 0.027483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 112.13542175292969, "completions/min_length": 39.25, "epoch": 5.610573343261355, "grad_norm": 0.0035962672116241637, "kl": 0.261962890625, "learning_rate": 4.1154607886422323e-07, "loss": 0.00026192772202193737, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3765, "train_speed(iter/s)": 0.027483 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 101.04166984558105, "completions/min_length": 43.0, "epoch": 5.612062546537603, "grad_norm": 0.6138968602781735, "kl": 0.2666015625, "learning_rate": 4.1131332441665893e-07, "loss": 0.008080998435616493, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3766, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 98.22917175292969, "completions/min_length": 40.5, "epoch": 5.613551749813849, "grad_norm": 0.0037249861380804256, "kl": 0.29345703125, "learning_rate": 4.110805898099492e-07, "loss": 0.0002938238612841815, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3767, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 94.50000190734863, "completions/min_length": 41.75, "epoch": 5.615040953090097, "grad_norm": 0.003621732865577569, "kl": 0.28076171875, "learning_rate": 4.108478750961608e-07, "loss": 0.00028056834707967937, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3768, "train_speed(iter/s)": 0.027484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.5, "completions/mean_length": 114.36458396911621, "completions/min_length": 46.25, "epoch": 5.616530156366344, "grad_norm": 0.0032142491783633343, "kl": 0.25830078125, "learning_rate": 4.106151803273567e-07, "loss": 0.0002585059264674783, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3769, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 110.71875190734863, "completions/min_length": 45.25, "epoch": 5.618019359642592, "grad_norm": 0.004523464353272897, "kl": 0.26318359375, "learning_rate": 4.103825055555946e-07, "loss": 0.0002629268856253475, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3770, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 89.81250381469727, "completions/min_length": 31.25, "epoch": 5.619508562918838, "grad_norm": 0.05133478678463551, "kl": 0.32763671875, "learning_rate": 4.1014985083292835e-07, "loss": 0.00032705586636438966, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3771, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 101.71875190734863, "completions/min_length": 46.75, "epoch": 5.620997766195085, "grad_norm": 0.003661019009449904, "kl": 0.26513671875, "learning_rate": 4.099172162114072e-07, "loss": 0.000265300739556551, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3772, "train_speed(iter/s)": 0.027484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.25, "completions/mean_length": 100.78125190734863, "completions/min_length": 37.75, "epoch": 5.622486969471333, "grad_norm": 0.0035424880457079616, "kl": 0.29931640625, "learning_rate": 4.096846017430757e-07, "loss": 0.0002991144428960979, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3773, "train_speed(iter/s)": 0.02748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.5, "completions/mean_length": 98.68750381469727, "completions/min_length": 35.75, "epoch": 5.62397617274758, "grad_norm": 2.403594734312623, "kl": 0.30078125, "learning_rate": 4.094520074799744e-07, "loss": -0.0054648942314088345, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.2819983549416065, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3774, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 100.01041984558105, "completions/min_length": 45.5, "epoch": 5.625465376023827, "grad_norm": 1.0054698251370373, "kl": 0.2734375, "learning_rate": 4.092194334741383e-07, "loss": 0.004351351410150528, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3775, "train_speed(iter/s)": 0.027484 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 92.38541793823242, "completions/min_length": 35.75, "epoch": 5.626954579300074, "grad_norm": 0.0032158199341101, "kl": 0.28271484375, "learning_rate": 4.0898687977759887e-07, "loss": 0.0002831960446201265, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3776, "train_speed(iter/s)": 0.027486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.25, "completions/mean_length": 96.23958587646484, "completions/min_length": 40.0, "epoch": 5.628443782576322, "grad_norm": 2.0085612266837147, "kl": 0.3125, "learning_rate": 4.0875434644238296e-07, "loss": 0.004766806028783321, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3777, "train_speed(iter/s)": 0.027486 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 104.22916793823242, "completions/min_length": 46.5, "epoch": 5.629932985852569, "grad_norm": 0.6348644543375663, "kl": 0.27587890625, "learning_rate": 4.0852183352051206e-07, "loss": 0.0179036445915699, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3778, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.75, "completions/mean_length": 88.14583396911621, "completions/min_length": 33.25, "epoch": 5.631422189128816, "grad_norm": 0.00406778470335765, "kl": 0.32421875, "learning_rate": 4.0828934106400393e-07, "loss": 0.00032374082366004586, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3779, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 96.54166984558105, "completions/min_length": 37.0, "epoch": 5.632911392405063, "grad_norm": 0.010009692710785332, "kl": 0.28857421875, "learning_rate": 4.080568691248714e-07, "loss": 0.00028820065199397504, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3780, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 100.51041984558105, "completions/min_length": 41.75, "epoch": 5.634400595681311, "grad_norm": 1.3492263162997626, "kl": 0.27001953125, "learning_rate": 4.078244177551226e-07, "loss": 0.006316705606877804, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000102445483, "rewards/CineAccuracyORM/std": 0.10206206887960434, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3781, "train_speed(iter/s)": 0.027482 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 97.27083587646484, "completions/min_length": 34.75, "epoch": 5.635889798957558, "grad_norm": 0.0034162516064805254, "kl": 0.30859375, "learning_rate": 4.075919870067616e-07, "loss": 0.00030907278414815664, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3782, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 92.60416793823242, "completions/min_length": 35.5, "epoch": 5.637379002233805, "grad_norm": 0.0032084770076727933, "kl": 0.29345703125, "learning_rate": 4.073595769317871e-07, "loss": 0.00029315537540242076, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3783, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.75, "completions/mean_length": 94.82292175292969, "completions/min_length": 33.0, "epoch": 5.638868205510052, "grad_norm": 0.0034689096680085123, "kl": 0.29736328125, "learning_rate": 4.071271875821937e-07, "loss": 0.00029733229894191027, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3784, "train_speed(iter/s)": 0.02748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 88.33333587646484, "completions/min_length": 37.5, "epoch": 5.640357408786299, "grad_norm": 0.003687945412962379, "kl": 0.3037109375, "learning_rate": 4.0689481900997103e-07, "loss": 0.00030349649023264647, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3785, "train_speed(iter/s)": 0.02748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 96.88541984558105, "completions/min_length": 34.75, "epoch": 5.641846612062547, "grad_norm": 0.003977208018823367, "kl": 0.2998046875, "learning_rate": 4.0666247126710446e-07, "loss": 0.0002992182271555066, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3786, "train_speed(iter/s)": 0.02748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 86.10416793823242, "completions/min_length": 38.5, "epoch": 5.643335815338793, "grad_norm": 1.182671891984316, "kl": 0.284423828125, "learning_rate": 4.0643014440557463e-07, "loss": 0.0033876574598252773, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3787, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 100.17708587646484, "completions/min_length": 42.0, "epoch": 5.644825018615041, "grad_norm": 0.0030081701200088498, "kl": 0.27392578125, "learning_rate": 4.061978384773569e-07, "loss": 0.0002742138458415866, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3788, "train_speed(iter/s)": 0.02748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 82.61458587646484, "completions/min_length": 34.0, "epoch": 5.646314221891288, "grad_norm": 1.230376016024726, "kl": 0.3193359375, "learning_rate": 4.0596555353442304e-07, "loss": -0.0021175078582018614, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.833333358168602, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3789, "train_speed(iter/s)": 0.027479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.25, "completions/mean_length": 93.64583587646484, "completions/min_length": 34.5, "epoch": 5.6478034251675355, "grad_norm": 0.003061905463509503, "kl": 0.30810546875, "learning_rate": 4.0573328962873883e-07, "loss": 0.00030743685783818364, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3790, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.0, "completions/mean_length": 92.61458587646484, "completions/min_length": 34.75, "epoch": 5.649292628443782, "grad_norm": 0.0031093977197924746, "kl": 0.31591796875, "learning_rate": 4.055010468122663e-07, "loss": 0.0003153950092382729, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3791, "train_speed(iter/s)": 0.027481 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 91.57292175292969, "completions/min_length": 32.75, "epoch": 5.65078183172003, "grad_norm": 1.5858062227591179, "kl": 0.3076171875, "learning_rate": 4.0526882513696263e-07, "loss": -0.0056968252174556255, "memory(GiB)": 112.53, "reward": 1.4687500298023224, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.4687500074505806, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3792, "train_speed(iter/s)": 0.027478 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 79.06250381469727, "completions/min_length": 41.5, "epoch": 5.652271034996277, "grad_norm": 2.4026611285843322, "kl": 0.35546875, "learning_rate": 4.050366246547799e-07, "loss": 0.008909708820283413, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3307463824748993, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3793, "train_speed(iter/s)": 0.02748 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.25, "completions/mean_length": 103.14583587646484, "completions/min_length": 38.0, "epoch": 5.6537602382725245, "grad_norm": 1.713526236542005, "kl": 0.28369140625, "learning_rate": 4.0480444541766575e-07, "loss": -0.012664607726037502, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500223517418, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3794, "train_speed(iter/s)": 0.027477 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 88.70833587646484, "completions/min_length": 41.5, "epoch": 5.655249441548771, "grad_norm": 1.7348920496081572, "kl": 0.2841796875, "learning_rate": 4.0457228747756317e-07, "loss": -0.0012208567932248116, "memory(GiB)": 112.53, "reward": 1.6979167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3795, "train_speed(iter/s)": 0.027478 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.0, "completions/mean_length": 82.22916984558105, "completions/min_length": 35.25, "epoch": 5.656738644825019, "grad_norm": 0.003485857117182597, "kl": 0.31982421875, "learning_rate": 4.043401508864098e-07, "loss": 0.00032023616950027645, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3796, "train_speed(iter/s)": 0.027478 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 75.63541793823242, "completions/min_length": 32.0, "epoch": 5.658227848101266, "grad_norm": 0.0033155001068752975, "kl": 0.3330078125, "learning_rate": 4.0410803569613927e-07, "loss": 0.00033319363137707114, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3797, "train_speed(iter/s)": 0.027479 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.25, "completions/mean_length": 97.20833587646484, "completions/min_length": 37.75, "epoch": 5.659717051377513, "grad_norm": 0.004034061662509603, "kl": 0.27880859375, "learning_rate": 4.0387594195867976e-07, "loss": 0.0002792379818856716, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3798, "train_speed(iter/s)": 0.027477 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 81.91666984558105, "completions/min_length": 35.25, "epoch": 5.66120625465376, "grad_norm": 1.4591465541045783, "kl": 0.32470703125, "learning_rate": 4.03643869725955e-07, "loss": 0.0015163684729486704, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3799, "train_speed(iter/s)": 0.027474 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 79.86458396911621, "completions/min_length": 34.25, "epoch": 5.662695457930007, "grad_norm": 0.003240541813223302, "kl": 0.30615234375, "learning_rate": 4.0341181904988433e-07, "loss": 0.0003060287272091955, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3800, "train_speed(iter/s)": 0.027474 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 92.28125381469727, "completions/min_length": 41.5, "epoch": 5.664184661206255, "grad_norm": 0.010519930987613897, "kl": 0.3056640625, "learning_rate": 4.03179789982381e-07, "loss": 0.00030561030143871903, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3801, "train_speed(iter/s)": 0.02747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 90.56250381469727, "completions/min_length": 34.25, "epoch": 5.6656738644825015, "grad_norm": 0.0031005033897583065, "kl": 0.302734375, "learning_rate": 4.0294778257535496e-07, "loss": 0.0003029801300726831, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3802, "train_speed(iter/s)": 0.027469 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 88.78125190734863, "completions/min_length": 33.75, "epoch": 5.667163067758749, "grad_norm": 1.0841902199592401, "kl": 0.3095703125, "learning_rate": 4.0271579688070996e-07, "loss": 0.010749047622084618, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3803, "train_speed(iter/s)": 0.02747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.5, "completions/mean_length": 100.01041984558105, "completions/min_length": 37.5, "epoch": 5.668652271034996, "grad_norm": 0.0031631746334074176, "kl": 0.2724609375, "learning_rate": 4.0248383295034573e-07, "loss": 0.0002722761710174382, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3804, "train_speed(iter/s)": 0.02747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 89.46875381469727, "completions/min_length": 36.0, "epoch": 5.670141474311244, "grad_norm": 0.0028953994726399666, "kl": 0.272705078125, "learning_rate": 4.0225189083615706e-07, "loss": 0.00027284747920930386, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3805, "train_speed(iter/s)": 0.02747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 86.32291984558105, "completions/min_length": 35.0, "epoch": 5.6716306775874905, "grad_norm": 1.3081875612336002, "kl": 0.28955078125, "learning_rate": 4.020199705900335e-07, "loss": -0.0147388381883502, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3806, "train_speed(iter/s)": 0.027469 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 88.92708587646484, "completions/min_length": 33.25, "epoch": 5.673119880863738, "grad_norm": 1.2827174117134015, "kl": 0.31298828125, "learning_rate": 4.017880722638602e-07, "loss": 0.002303590066730976, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3807, "train_speed(iter/s)": 0.027471 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 88.98958587646484, "completions/min_length": 32.5, "epoch": 5.674609084139985, "grad_norm": 0.0030905891721065095, "kl": 0.29638671875, "learning_rate": 4.0155619590951673e-07, "loss": 0.0002959062112495303, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3808, "train_speed(iter/s)": 0.02747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 82.97916793823242, "completions/min_length": 39.75, "epoch": 5.676098287416233, "grad_norm": 1.0316922854169355, "kl": 0.296875, "learning_rate": 4.013243415788782e-07, "loss": -0.0007238794933073223, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3809, "train_speed(iter/s)": 0.027473 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 82.56250381469727, "completions/min_length": 33.0, "epoch": 5.6775874906924795, "grad_norm": 0.0032383160605814493, "kl": 0.3125, "learning_rate": 4.0109250932381503e-07, "loss": 0.000312779942760244, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3810, "train_speed(iter/s)": 0.027471 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.75, "completions/mean_length": 88.68750190734863, "completions/min_length": 35.25, "epoch": 5.679076693968726, "grad_norm": 0.0030071730555103746, "kl": 0.27880859375, "learning_rate": 4.0086069919619215e-07, "loss": 0.00027922214940190315, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3811, "train_speed(iter/s)": 0.027469 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 91.08333587646484, "completions/min_length": 38.25, "epoch": 5.680565897244974, "grad_norm": 1.229572704865113, "kl": 0.281494140625, "learning_rate": 4.0062891124787e-07, "loss": -0.004470080137252808, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3812, "train_speed(iter/s)": 0.02747 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 89.57291793823242, "completions/min_length": 42.5, "epoch": 5.682055100521221, "grad_norm": 0.025463689586746913, "kl": 0.302978515625, "learning_rate": 4.0039714553070344e-07, "loss": 0.00030257602338679135, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3813, "train_speed(iter/s)": 0.027471 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 88.28125190734863, "completions/min_length": 40.5, "epoch": 5.6835443037974684, "grad_norm": 0.0034149748925239864, "kl": 0.31689453125, "learning_rate": 4.0016540209654303e-07, "loss": 0.00031754557858221233, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3814, "train_speed(iter/s)": 0.027469 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 93.84375381469727, "completions/min_length": 40.25, "epoch": 5.685033507073715, "grad_norm": 0.003018509309773397, "kl": 0.2783203125, "learning_rate": 3.9993368099723425e-07, "loss": 0.0002775842440314591, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3815, "train_speed(iter/s)": 0.027467 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.0, "completions/mean_length": 85.65625190734863, "completions/min_length": 42.0, "epoch": 5.686522710349963, "grad_norm": 0.003185629876033196, "kl": 0.298828125, "learning_rate": 3.99701982284617e-07, "loss": 0.0002986495092045516, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3816, "train_speed(iter/s)": 0.027466 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.0, "completions/mean_length": 85.54166793823242, "completions/min_length": 36.25, "epoch": 5.68801191362621, "grad_norm": 0.0032699839718068283, "kl": 0.3056640625, "learning_rate": 3.994703060105269e-07, "loss": 0.0003056571295019239, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3817, "train_speed(iter/s)": 0.027467 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 93.69791984558105, "completions/min_length": 34.5, "epoch": 5.689501116902457, "grad_norm": 0.0029656301390884143, "kl": 0.28857421875, "learning_rate": 3.9923865222679397e-07, "loss": 0.0002885334542952478, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3818, "train_speed(iter/s)": 0.027466 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 88.84375190734863, "completions/min_length": 36.25, "epoch": 5.690990320178704, "grad_norm": 0.0030545395658977117, "kl": 0.2890625, "learning_rate": 3.9900702098524364e-07, "loss": 0.00028900272445753217, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3819, "train_speed(iter/s)": 0.027466 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 98.16666793823242, "completions/min_length": 40.5, "epoch": 5.692479523454952, "grad_norm": 0.9019136943145603, "kl": 0.291015625, "learning_rate": 3.9877541233769627e-07, "loss": 0.002770377090200782, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3820, "train_speed(iter/s)": 0.027465 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 94.54166793823242, "completions/min_length": 35.75, "epoch": 5.693968726731199, "grad_norm": 0.0031193219613176526, "kl": 0.3046875, "learning_rate": 3.985438263359666e-07, "loss": 0.00030433936626650393, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3821, "train_speed(iter/s)": 0.027465 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 97.88541793823242, "completions/min_length": 33.75, "epoch": 5.695457930007446, "grad_norm": 0.003069946453208659, "kl": 0.2744140625, "learning_rate": 3.9831226303186504e-07, "loss": 0.00027429620968177915, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3822, "train_speed(iter/s)": 0.027467 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 94.37500381469727, "completions/min_length": 37.25, "epoch": 5.696947133283693, "grad_norm": 0.6956660727471788, "kl": 0.28125, "learning_rate": 3.9808072247719645e-07, "loss": 0.002571537857875228, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3823, "train_speed(iter/s)": 0.027466 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 409.5, "completions/mean_length": 93.08333587646484, "completions/min_length": 39.25, "epoch": 5.69843633655994, "grad_norm": 0.31088076328374364, "kl": 0.29345703125, "learning_rate": 3.978492047237607e-07, "loss": 0.0672878697514534, "memory(GiB)": 112.53, "reward": 1.479166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.4895833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 3824, "train_speed(iter/s)": 0.027465 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 88.34375190734863, "completions/min_length": 40.5, "epoch": 5.699925539836188, "grad_norm": 1.6880654104992951, "kl": 0.2880859375, "learning_rate": 3.9761770982335296e-07, "loss": -0.02572968229651451, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.2005344033241272, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3825, "train_speed(iter/s)": 0.027462 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 93.55208587646484, "completions/min_length": 36.75, "epoch": 5.701414743112435, "grad_norm": 1.8158849463585545, "kl": 0.29833984375, "learning_rate": 3.9738623782776247e-07, "loss": -0.015405352227389812, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3826, "train_speed(iter/s)": 0.027461 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 94.76041984558105, "completions/min_length": 42.75, "epoch": 5.702903946388682, "grad_norm": 0.0031674074680294547, "kl": 0.30126953125, "learning_rate": 3.9715478878877416e-07, "loss": 0.0003006648912560195, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3827, "train_speed(iter/s)": 0.027459 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 86.36458587646484, "completions/min_length": 38.25, "epoch": 5.704393149664929, "grad_norm": 0.7102132472404632, "kl": 0.30859375, "learning_rate": 3.969233627581672e-07, "loss": -0.009639754891395569, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3828, "train_speed(iter/s)": 0.027462 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.5, "completions/mean_length": 92.77083587646484, "completions/min_length": 40.25, "epoch": 5.705882352941177, "grad_norm": 0.01708709580709895, "kl": 0.29736328125, "learning_rate": 3.966919597877159e-07, "loss": 0.0002970509813167155, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3829, "train_speed(iter/s)": 0.027458 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 86.98958778381348, "completions/min_length": 36.5, "epoch": 5.707371556217423, "grad_norm": 0.9101013363879736, "kl": 0.30029296875, "learning_rate": 3.9646057992918966e-07, "loss": -0.008833338506519794, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3830, "train_speed(iter/s)": 0.027457 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 90.08333587646484, "completions/min_length": 41.0, "epoch": 5.708860759493671, "grad_norm": 0.002947957481220793, "kl": 0.287353515625, "learning_rate": 3.9622922323435213e-07, "loss": 0.0002873280318453908, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3831, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.5, "completions/mean_length": 95.33333396911621, "completions/min_length": 40.25, "epoch": 5.710349962769918, "grad_norm": 1.0470408834502714, "kl": 0.2763671875, "learning_rate": 3.9599788975496255e-07, "loss": 0.01582391932606697, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3832, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 94.66666793823242, "completions/min_length": 33.5, "epoch": 5.711839166046166, "grad_norm": 0.0039033040910261144, "kl": 0.2841796875, "learning_rate": 3.95766579542774e-07, "loss": 0.00028384881443344057, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3833, "train_speed(iter/s)": 0.027456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 82.95833587646484, "completions/min_length": 37.75, "epoch": 5.713328369322412, "grad_norm": 0.8390574152097977, "kl": 0.3017578125, "learning_rate": 3.95535292649535e-07, "loss": 0.010713253170251846, "memory(GiB)": 112.53, "reward": 1.4895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4895833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3834, "train_speed(iter/s)": 0.027458 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 85.71875190734863, "completions/min_length": 38.0, "epoch": 5.71481757259866, "grad_norm": 0.0031597596270724155, "kl": 0.2998046875, "learning_rate": 3.95304029126989e-07, "loss": 0.0002996252151206136, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3835, "train_speed(iter/s)": 0.027459 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.25, "completions/mean_length": 96.73958778381348, "completions/min_length": 34.0, "epoch": 5.716306775874907, "grad_norm": 0.0027270365411907445, "kl": 0.253173828125, "learning_rate": 3.950727890268735e-07, "loss": 0.00025331496726721525, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3836, "train_speed(iter/s)": 0.027457 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 88.54166984558105, "completions/min_length": 43.75, "epoch": 5.717795979151154, "grad_norm": 0.0031042299737855214, "kl": 0.31201171875, "learning_rate": 3.9484157240092176e-07, "loss": 0.0003125234798062593, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3837, "train_speed(iter/s)": 0.027456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 84.72916793823242, "completions/min_length": 38.0, "epoch": 5.719285182427401, "grad_norm": 0.0034104484037565112, "kl": 0.310546875, "learning_rate": 3.946103793008606e-07, "loss": 0.00031091569690033793, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3838, "train_speed(iter/s)": 0.027457 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 89.625, "completions/min_length": 42.0, "epoch": 5.720774385703649, "grad_norm": 1.4978737732341532, "kl": 0.3056640625, "learning_rate": 3.943792097784126e-07, "loss": -0.004664897453039885, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3839, "train_speed(iter/s)": 0.027454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.25, "completions/mean_length": 94.19791984558105, "completions/min_length": 34.0, "epoch": 5.722263588979896, "grad_norm": 1.12560388134511, "kl": 0.310546875, "learning_rate": 3.941480638852948e-07, "loss": 0.006782475393265486, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3840, "train_speed(iter/s)": 0.027451 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 98.15625190734863, "completions/min_length": 33.25, "epoch": 5.723752792256143, "grad_norm": 0.5581384274640582, "kl": 0.31005859375, "learning_rate": 3.939169416732183e-07, "loss": 0.006455061491578817, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3841, "train_speed(iter/s)": 0.027451 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 84.16666984558105, "completions/min_length": 36.75, "epoch": 5.72524199553239, "grad_norm": 0.0032675975197352393, "kl": 0.28955078125, "learning_rate": 3.9368584319388985e-07, "loss": 0.00028973055304959416, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3842, "train_speed(iter/s)": 0.027449 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 83.94791793823242, "completions/min_length": 38.0, "epoch": 5.726731198808637, "grad_norm": 0.0034702623393838864, "kl": 0.3056640625, "learning_rate": 3.9345476849901025e-07, "loss": 0.00030573573894798756, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3843, "train_speed(iter/s)": 0.027452 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 96.19791793823242, "completions/min_length": 40.0, "epoch": 5.728220402084885, "grad_norm": 0.003080691433030327, "kl": 0.290771484375, "learning_rate": 3.9322371764027534e-07, "loss": 0.0002902964479289949, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3844, "train_speed(iter/s)": 0.027452 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 87.83333778381348, "completions/min_length": 39.5, "epoch": 5.729709605361132, "grad_norm": 0.0032358751702503525, "kl": 0.289306640625, "learning_rate": 3.929926906693756e-07, "loss": 0.00028936791932210326, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3845, "train_speed(iter/s)": 0.027451 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 90.66666984558105, "completions/min_length": 40.5, "epoch": 5.731198808637379, "grad_norm": 0.010993878739886546, "kl": 0.31884765625, "learning_rate": 3.927616876379957e-07, "loss": 0.0003191455907654017, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3846, "train_speed(iter/s)": 0.027453 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 91.89583587646484, "completions/min_length": 37.5, "epoch": 5.732688011913626, "grad_norm": 0.8712305108398981, "kl": 0.2841796875, "learning_rate": 3.9253070859781543e-07, "loss": -0.009133687242865562, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3847, "train_speed(iter/s)": 0.027454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 92.17708778381348, "completions/min_length": 45.25, "epoch": 5.734177215189874, "grad_norm": 0.0029911414586130548, "kl": 0.2890625, "learning_rate": 3.9229975360050934e-07, "loss": 0.0002890433243010193, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3848, "train_speed(iter/s)": 0.027454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 84.97916793823242, "completions/min_length": 31.5, "epoch": 5.735666418466121, "grad_norm": 1.8084998596925357, "kl": 0.33203125, "learning_rate": 3.92068822697746e-07, "loss": -0.021177059039473534, "memory(GiB)": 112.53, "reward": 1.9687500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.9687500149011612, "rewards/CineAccuracyORM/std": 0.12161349877715111, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3849, "train_speed(iter/s)": 0.027454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 83.92708587646484, "completions/min_length": 34.5, "epoch": 5.737155621742367, "grad_norm": 0.0031007080180657657, "kl": 0.31201171875, "learning_rate": 3.918379159411893e-07, "loss": 0.0003116300795227289, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3850, "train_speed(iter/s)": 0.027456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 87.83333587646484, "completions/min_length": 42.0, "epoch": 5.738644825018615, "grad_norm": 1.0118889397901316, "kl": 0.3193359375, "learning_rate": 3.9160703338249687e-07, "loss": 0.0013099946081638336, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3851, "train_speed(iter/s)": 0.027456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 86.46875381469727, "completions/min_length": 31.75, "epoch": 5.740134028294863, "grad_norm": 0.0028332068352195598, "kl": 0.298828125, "learning_rate": 3.913761750733218e-07, "loss": 0.0002983149606734514, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3852, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.0, "completions/mean_length": 103.77083587646484, "completions/min_length": 35.25, "epoch": 5.7416232315711095, "grad_norm": 1.2117856458355953, "kl": 0.24853515625, "learning_rate": 3.911453410653115e-07, "loss": 0.0006060014711692929, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3853, "train_speed(iter/s)": 0.027453 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.5, "completions/mean_length": 96.53125190734863, "completions/min_length": 38.25, "epoch": 5.743112434847356, "grad_norm": 0.0028844962217260887, "kl": 0.279541015625, "learning_rate": 3.9091453141010733e-07, "loss": 0.0002794033498503268, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3854, "train_speed(iter/s)": 0.027454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 97.67708587646484, "completions/min_length": 39.0, "epoch": 5.744601638123604, "grad_norm": 0.003038199058452638, "kl": 0.28515625, "learning_rate": 3.9068374615934605e-07, "loss": 0.0002851347962860018, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3855, "train_speed(iter/s)": 0.027454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 80.77083587646484, "completions/min_length": 33.75, "epoch": 5.746090841399851, "grad_norm": 0.004685191067677087, "kl": 0.32373046875, "learning_rate": 3.904529853646584e-07, "loss": 0.00032337798620574176, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3856, "train_speed(iter/s)": 0.027454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 96.90625381469727, "completions/min_length": 44.5, "epoch": 5.7475800446760985, "grad_norm": 0.003123309790336732, "kl": 0.28759765625, "learning_rate": 3.9022224907766997e-07, "loss": 0.00028720154659822583, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3857, "train_speed(iter/s)": 0.027457 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 95.29166984558105, "completions/min_length": 40.25, "epoch": 5.749069247952345, "grad_norm": 0.002880601248012258, "kl": 0.2900390625, "learning_rate": 3.8999153735000103e-07, "loss": 0.0002901284606195986, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3858, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 97.95833587646484, "completions/min_length": 35.75, "epoch": 5.750558451228593, "grad_norm": 0.0030015726821376464, "kl": 0.26953125, "learning_rate": 3.897608502332655e-07, "loss": 0.0002694114809855819, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3859, "train_speed(iter/s)": 0.027456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 93.56250381469727, "completions/min_length": 33.0, "epoch": 5.75204765450484, "grad_norm": 0.003015435531682261, "kl": 0.272705078125, "learning_rate": 3.8953018777907275e-07, "loss": 0.0002725800732150674, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3860, "train_speed(iter/s)": 0.027456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 91.19791984558105, "completions/min_length": 34.0, "epoch": 5.7535368577810875, "grad_norm": 0.7743678175751868, "kl": 0.272705078125, "learning_rate": 3.89299550039026e-07, "loss": 0.008362287655472755, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3861, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.0, "completions/mean_length": 95.87500190734863, "completions/min_length": 37.0, "epoch": 5.755026061057334, "grad_norm": 0.8973530901491926, "kl": 0.26611328125, "learning_rate": 3.890689370647234e-07, "loss": 0.023724032565951347, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3862, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.75, "completions/mean_length": 94.20833587646484, "completions/min_length": 39.25, "epoch": 5.756515264333581, "grad_norm": 0.0029405271784672914, "kl": 0.264892578125, "learning_rate": 3.888383489077575e-07, "loss": 0.000265018199570477, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3863, "train_speed(iter/s)": 0.027456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 94.08333396911621, "completions/min_length": 36.25, "epoch": 5.758004467609829, "grad_norm": 0.0042999855576765925, "kl": 0.27001953125, "learning_rate": 3.8860778561971476e-07, "loss": 0.00027009364566765726, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3864, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 104.16666984558105, "completions/min_length": 44.5, "epoch": 5.759493670886076, "grad_norm": 1.8498106033177317, "kl": 0.28369140625, "learning_rate": 3.883772472521768e-07, "loss": 0.022205490618944168, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.10661446116864681, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.29724033921957016, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3865, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 91.00000381469727, "completions/min_length": 36.25, "epoch": 5.760982874162323, "grad_norm": 1.2066968951982646, "kl": 0.3017578125, "learning_rate": 3.8814673385671893e-07, "loss": 0.020714232698082924, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3866, "train_speed(iter/s)": 0.027453 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/mean_length": 93.85416793823242, "completions/min_length": 36.5, "epoch": 5.76247207743857, "grad_norm": 0.003140422249381686, "kl": 0.30078125, "learning_rate": 3.879162454849115e-07, "loss": 0.00030111579690128565, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3867, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.25, "completions/mean_length": 110.94791793823242, "completions/min_length": 36.25, "epoch": 5.763961280714818, "grad_norm": 1.7108326889679053, "kl": 0.247314453125, "learning_rate": 3.8768578218831925e-07, "loss": -0.012179659679532051, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3868, "train_speed(iter/s)": 0.027455 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 93.75000190734863, "completions/min_length": 32.75, "epoch": 5.7654504839910645, "grad_norm": 0.0031091786295594547, "kl": 0.2763671875, "learning_rate": 3.8745534401850074e-07, "loss": 0.00027695749304257333, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3869, "train_speed(iter/s)": 0.027454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 94.83333587646484, "completions/min_length": 37.75, "epoch": 5.766939687267312, "grad_norm": 0.07312939991976972, "kl": 0.33056640625, "learning_rate": 3.872249310270096e-07, "loss": 0.0003298445080872625, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3870, "train_speed(iter/s)": 0.027454 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.0, "completions/mean_length": 105.22916984558105, "completions/min_length": 46.25, "epoch": 5.768428890543559, "grad_norm": 0.003014585860258055, "kl": 0.240234375, "learning_rate": 3.869945432653931e-07, "loss": 0.0002401881938567385, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3871, "train_speed(iter/s)": 0.027456 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 98.45833587646484, "completions/min_length": 52.0, "epoch": 5.769918093819807, "grad_norm": 0.0029803095861326272, "kl": 0.28369140625, "learning_rate": 3.867641807851935e-07, "loss": 0.00028309941990301013, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3872, "train_speed(iter/s)": 0.027453 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 96.82291793823242, "completions/min_length": 32.25, "epoch": 5.7714072970960535, "grad_norm": 0.003502526255639913, "kl": 0.291015625, "learning_rate": 3.865338436379472e-07, "loss": 0.000291072647087276, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3873, "train_speed(iter/s)": 0.027452 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 100.25000381469727, "completions/min_length": 43.5, "epoch": 5.772896500372301, "grad_norm": 1.0852702941862717, "kl": 0.26806640625, "learning_rate": 3.863035318751847e-07, "loss": -0.012642333284020424, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3874, "train_speed(iter/s)": 0.027452 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 107.45833587646484, "completions/min_length": 39.75, "epoch": 5.774385703648548, "grad_norm": 0.588432262036097, "kl": 0.28125, "learning_rate": 3.860732455484313e-07, "loss": 0.009297192096710205, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833386108279, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3875, "train_speed(iter/s)": 0.027451 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.75, "completions/mean_length": 105.03125381469727, "completions/min_length": 42.0, "epoch": 5.775874906924795, "grad_norm": 0.0032906272934872497, "kl": 0.254150390625, "learning_rate": 3.8584298470920596e-07, "loss": 0.000253724108915776, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3876, "train_speed(iter/s)": 0.02745 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 103.60416793823242, "completions/min_length": 41.5, "epoch": 5.7773641102010425, "grad_norm": 0.002958961247214743, "kl": 0.26708984375, "learning_rate": 3.856127494090225e-07, "loss": 0.00026657950365915895, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3877, "train_speed(iter/s)": 0.027448 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.5, "completions/mean_length": 97.04166793823242, "completions/min_length": 37.75, "epoch": 5.77885331347729, "grad_norm": 0.0038388670059938757, "kl": 0.29833984375, "learning_rate": 3.8538253969938904e-07, "loss": 0.0002980623976327479, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3878, "train_speed(iter/s)": 0.027447 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.5, "completions/mean_length": 94.30208587646484, "completions/min_length": 28.25, "epoch": 5.780342516753537, "grad_norm": 0.8193399751649506, "kl": 0.3095703125, "learning_rate": 3.851523556318073e-07, "loss": 0.019286487251520157, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3879, "train_speed(iter/s)": 0.027445 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.0, "completions/mean_length": 116.1562557220459, "completions/min_length": 39.5, "epoch": 5.781831720029784, "grad_norm": 0.04376697628699702, "kl": 0.307373046875, "learning_rate": 3.8492219725777416e-07, "loss": 0.0003065559430979192, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3880, "train_speed(iter/s)": 0.027445 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.75, "completions/mean_length": 107.57291793823242, "completions/min_length": 30.5, "epoch": 5.783320923306031, "grad_norm": 0.003225214327593538, "kl": 0.287109375, "learning_rate": 3.846920646287799e-07, "loss": 0.00028650378226302564, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3881, "train_speed(iter/s)": 0.027443 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.25, "completions/mean_length": 92.10416984558105, "completions/min_length": 34.75, "epoch": 5.784810126582278, "grad_norm": 0.004556218965264493, "kl": 0.29296875, "learning_rate": 3.8446195779630975e-07, "loss": 0.0002923743159044534, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3882, "train_speed(iter/s)": 0.027442 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 93.31250381469727, "completions/min_length": 38.25, "epoch": 5.786299329858526, "grad_norm": 1.6944979964786238, "kl": 0.288818359375, "learning_rate": 3.84231876811843e-07, "loss": 0.012787191197276115, "memory(GiB)": 112.53, "reward": 1.9791666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3883, "train_speed(iter/s)": 0.027442 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 81.21875, "completions/min_length": 35.75, "epoch": 5.787788533134773, "grad_norm": 0.005983397930170259, "kl": 0.328125, "learning_rate": 3.840018217268526e-07, "loss": 0.00032805983209982514, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3884, "train_speed(iter/s)": 0.027443 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 452.0, "completions/mean_length": 106.09375381469727, "completions/min_length": 29.75, "epoch": 5.78927773641102, "grad_norm": 1.6741879352626483, "kl": 0.296630859375, "learning_rate": 3.837717925928066e-07, "loss": 0.0856155976653099, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.13151744566857815, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.2783776558935642, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 3885, "train_speed(iter/s)": 0.027438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.75, "completions/mean_length": 101.30208396911621, "completions/min_length": 42.5, "epoch": 5.790766939687267, "grad_norm": 2.0040237255611135, "kl": 0.27587890625, "learning_rate": 3.8354178946116633e-07, "loss": 0.017824744805693626, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.5416666865348816, "rewards/CineAccuracyORM/std": 0.48409245908260345, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3886, "train_speed(iter/s)": 0.027439 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 88.77083778381348, "completions/min_length": 33.0, "epoch": 5.792256142963515, "grad_norm": 1.5243872618029721, "kl": 0.31103515625, "learning_rate": 3.83311812383388e-07, "loss": 0.0062790522351861, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3887, "train_speed(iter/s)": 0.027438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 97.19791984558105, "completions/min_length": 32.5, "epoch": 5.793745346239762, "grad_norm": 0.0051491377739104815, "kl": 0.3017578125, "learning_rate": 3.8308186141092203e-07, "loss": 0.00030140989110805094, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3888, "train_speed(iter/s)": 0.027437 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 88.00000381469727, "completions/min_length": 28.75, "epoch": 5.7952345495160085, "grad_norm": 2.117923270667591, "kl": 0.31689453125, "learning_rate": 3.8285193659521217e-07, "loss": -0.004458198323845863, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666753590107, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3889, "train_speed(iter/s)": 0.027435 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 82.97916984558105, "completions/min_length": 38.5, "epoch": 5.796723752792256, "grad_norm": 0.0029355953898280397, "kl": 0.28271484375, "learning_rate": 3.826220379876973e-07, "loss": 0.00028196058701723814, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3890, "train_speed(iter/s)": 0.027436 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 105.32291984558105, "completions/min_length": 36.25, "epoch": 5.798212956068504, "grad_norm": 1.2331853766052172, "kl": 0.2607421875, "learning_rate": 3.823921656398097e-07, "loss": 0.02138303779065609, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3891, "train_speed(iter/s)": 0.027438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 95.51041984558105, "completions/min_length": 31.75, "epoch": 5.799702159344751, "grad_norm": 0.006418706748819228, "kl": 0.284423828125, "learning_rate": 3.821623196029762e-07, "loss": 0.0002838215441443026, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3892, "train_speed(iter/s)": 0.027432 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 88.61458587646484, "completions/min_length": 38.75, "epoch": 5.801191362620997, "grad_norm": 0.0031733941662384895, "kl": 0.283935546875, "learning_rate": 3.8193249992861767e-07, "loss": 0.000283389410469681, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3893, "train_speed(iter/s)": 0.027433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 100.02083587646484, "completions/min_length": 35.75, "epoch": 5.802680565897245, "grad_norm": 0.0033214153288046075, "kl": 0.266357421875, "learning_rate": 3.817027066681489e-07, "loss": 0.00026642114971764386, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3894, "train_speed(iter/s)": 0.027431 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 90.21875190734863, "completions/min_length": 28.25, "epoch": 5.804169769173492, "grad_norm": 1.2581010068050837, "kl": 0.30419921875, "learning_rate": 3.814729398729792e-07, "loss": -0.0021388207096606493, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500102445483, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3895, "train_speed(iter/s)": 0.027432 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 90.11458396911621, "completions/min_length": 32.5, "epoch": 5.80565897244974, "grad_norm": 0.0034740740239240227, "kl": 0.30908203125, "learning_rate": 3.812431995945113e-07, "loss": 0.0003091199032496661, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3896, "train_speed(iter/s)": 0.027432 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 84.56250381469727, "completions/min_length": 32.75, "epoch": 5.807148175725986, "grad_norm": 1.0481816487604902, "kl": 0.328125, "learning_rate": 3.810134858841425e-07, "loss": 0.004833896644413471, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3897, "train_speed(iter/s)": 0.027434 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.0, "completions/mean_length": 89.53125190734863, "completions/min_length": 38.5, "epoch": 5.808637379002234, "grad_norm": 1.1531551181193507, "kl": 0.3427734375, "learning_rate": 3.8078379879326415e-07, "loss": 5.911892367294058e-05, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3898, "train_speed(iter/s)": 0.027435 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 85.76041984558105, "completions/min_length": 35.0, "epoch": 5.810126582278481, "grad_norm": 0.003557713352721562, "kl": 0.32763671875, "learning_rate": 3.805541383732613e-07, "loss": 0.0003272265603300184, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3899, "train_speed(iter/s)": 0.027437 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 98.47916984558105, "completions/min_length": 38.5, "epoch": 5.811615785554729, "grad_norm": 0.003023080517676318, "kl": 0.266845703125, "learning_rate": 3.8032450467551337e-07, "loss": 0.0002669412351679057, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3900, "train_speed(iter/s)": 0.027437 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.5, "completions/mean_length": 103.83333587646484, "completions/min_length": 31.0, "epoch": 5.813104988830975, "grad_norm": 0.0039394486980445596, "kl": 0.256103515625, "learning_rate": 3.800948977513938e-07, "loss": 0.000256103667197749, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3901, "train_speed(iter/s)": 0.027438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 96.38541793823242, "completions/min_length": 34.25, "epoch": 5.814594192107222, "grad_norm": 0.0038214923446788208, "kl": 0.28759765625, "learning_rate": 3.798653176522696e-07, "loss": 0.00028795201797038317, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3902, "train_speed(iter/s)": 0.027439 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.0, "completions/mean_length": 95.69791984558105, "completions/min_length": 34.25, "epoch": 5.81608339538347, "grad_norm": 0.0034497877824875246, "kl": 0.29931640625, "learning_rate": 3.796357644295025e-07, "loss": 0.0002992452646140009, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3903, "train_speed(iter/s)": 0.027438 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 82.18750286102295, "completions/min_length": 33.25, "epoch": 5.8175725986597175, "grad_norm": 0.0036360771468255068, "kl": 0.33642578125, "learning_rate": 3.7940623813444736e-07, "loss": 0.00033684479421935976, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3904, "train_speed(iter/s)": 0.027436 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 85.13541984558105, "completions/min_length": 27.0, "epoch": 5.819061801935964, "grad_norm": 1.145590266741032, "kl": 0.306640625, "learning_rate": 3.791767388184537e-07, "loss": -0.006505511235445738, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3905, "train_speed(iter/s)": 0.027434 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 93.09375190734863, "completions/min_length": 26.0, "epoch": 5.820551005212211, "grad_norm": 0.005074504641884703, "kl": 0.2978515625, "learning_rate": 3.789472665328649e-07, "loss": 0.0002976678078994155, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3906, "train_speed(iter/s)": 0.027433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.0, "completions/mean_length": 90.20833587646484, "completions/min_length": 26.5, "epoch": 5.822040208488459, "grad_norm": 0.002865947246791333, "kl": 0.2890625, "learning_rate": 3.787178213290179e-07, "loss": 0.0002894751087296754, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3907, "train_speed(iter/s)": 0.027435 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 92.46875190734863, "completions/min_length": 40.0, "epoch": 5.823529411764706, "grad_norm": 0.003048724962218847, "kl": 0.27587890625, "learning_rate": 3.784884032582442e-07, "loss": 0.0002757144393399358, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3908, "train_speed(iter/s)": 0.027433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 313.25, "completions/mean_length": 90.46875381469727, "completions/min_length": 34.5, "epoch": 5.825018615040953, "grad_norm": 0.003277478107650195, "kl": 0.3076171875, "learning_rate": 3.7825901237186844e-07, "loss": 0.0003070707607548684, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3909, "train_speed(iter/s)": 0.027433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.75, "completions/mean_length": 104.54166984558105, "completions/min_length": 39.75, "epoch": 5.8265078183172, "grad_norm": 0.6797929985779924, "kl": 0.27734375, "learning_rate": 3.780296487212099e-07, "loss": -0.0004610639298334718, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3910, "train_speed(iter/s)": 0.02743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 82.44791984558105, "completions/min_length": 30.0, "epoch": 5.827997021593448, "grad_norm": 0.0033770612778313692, "kl": 0.330078125, "learning_rate": 3.778003123575815e-07, "loss": 0.0003293000627309084, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3911, "train_speed(iter/s)": 0.027431 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.5, "completions/mean_length": 97.04166793823242, "completions/min_length": 31.0, "epoch": 5.829486224869695, "grad_norm": 0.0031781362932982544, "kl": 0.27734375, "learning_rate": 3.7757100333228986e-07, "loss": 0.00027725426480174065, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3912, "train_speed(iter/s)": 0.027432 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 96.43750190734863, "completions/min_length": 30.5, "epoch": 5.830975428145942, "grad_norm": 0.0033239307224286565, "kl": 0.27392578125, "learning_rate": 3.773417216966359e-07, "loss": 0.0002736769092734903, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3913, "train_speed(iter/s)": 0.027433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.75, "completions/mean_length": 97.02083587646484, "completions/min_length": 38.25, "epoch": 5.832464631422189, "grad_norm": 1.8666273660964605, "kl": 0.27490234375, "learning_rate": 3.7711246750191395e-07, "loss": 0.007098916452378035, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3914, "train_speed(iter/s)": 0.027432 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.75, "completions/mean_length": 88.76041984558105, "completions/min_length": 26.0, "epoch": 5.833953834698436, "grad_norm": 0.0038373824162338463, "kl": 0.32958984375, "learning_rate": 3.768832407994124e-07, "loss": 0.00032990885665640235, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3915, "train_speed(iter/s)": 0.027429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 88.81250190734863, "completions/min_length": 36.75, "epoch": 5.8354430379746836, "grad_norm": 0.0031830574180758417, "kl": 0.2861328125, "learning_rate": 3.7665404164041395e-07, "loss": 0.00028620645753107965, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3916, "train_speed(iter/s)": 0.027429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 92.46875190734863, "completions/min_length": 28.5, "epoch": 5.836932241250931, "grad_norm": 0.003127419336658952, "kl": 0.28564453125, "learning_rate": 3.764248700761941e-07, "loss": 0.00028524926165118814, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3917, "train_speed(iter/s)": 0.027431 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 305.25, "completions/mean_length": 102.00000190734863, "completions/min_length": 30.0, "epoch": 5.838421444527178, "grad_norm": 0.01082121514208666, "kl": 0.28564453125, "learning_rate": 3.761957261580232e-07, "loss": 0.0002855782804545015, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3918, "train_speed(iter/s)": 0.027431 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.25, "completions/mean_length": 102.80208778381348, "completions/min_length": 35.25, "epoch": 5.839910647803425, "grad_norm": 0.003135354136957241, "kl": 0.273681640625, "learning_rate": 3.7596660993716475e-07, "loss": 0.000273519370239228, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3919, "train_speed(iter/s)": 0.027429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 81.87500381469727, "completions/min_length": 31.5, "epoch": 5.8413998510796725, "grad_norm": 0.0038890214572010373, "kl": 0.3037109375, "learning_rate": 3.7573752146487636e-07, "loss": 0.00030397085356526077, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3920, "train_speed(iter/s)": 0.027431 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 91.42708778381348, "completions/min_length": 42.5, "epoch": 5.842889054355919, "grad_norm": 0.003406102495457546, "kl": 0.296875, "learning_rate": 3.7550846079240973e-07, "loss": 0.00029664600151591003, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3921, "train_speed(iter/s)": 0.027433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.5, "completions/mean_length": 102.10416793823242, "completions/min_length": 41.0, "epoch": 5.844378257632167, "grad_norm": 1.2867304114496445, "kl": 0.25732421875, "learning_rate": 3.7527942797100934e-07, "loss": 0.023292869329452515, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3922, "train_speed(iter/s)": 0.027431 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 90.58333587646484, "completions/min_length": 38.25, "epoch": 5.845867460908414, "grad_norm": 0.0033826087511199692, "kl": 0.29638671875, "learning_rate": 3.750504230519146e-07, "loss": 0.000296334910672158, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3923, "train_speed(iter/s)": 0.02743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 88.59375190734863, "completions/min_length": 32.5, "epoch": 5.8473566641846615, "grad_norm": 0.9855756843647312, "kl": 0.28515625, "learning_rate": 3.748214460863578e-07, "loss": 0.012178601697087288, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3924, "train_speed(iter/s)": 0.027432 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.25, "completions/mean_length": 90.66667175292969, "completions/min_length": 39.0, "epoch": 5.848845867460908, "grad_norm": 0.0034901859223843644, "kl": 0.29443359375, "learning_rate": 3.7459249712556557e-07, "loss": 0.0002945346350315958, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3925, "train_speed(iter/s)": 0.02743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.25, "completions/mean_length": 98.88541984558105, "completions/min_length": 30.25, "epoch": 5.850335070737156, "grad_norm": 0.003433584857687546, "kl": 0.2685546875, "learning_rate": 3.7436357622075815e-07, "loss": 0.0002683031198102981, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3926, "train_speed(iter/s)": 0.02743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.5, "completions/mean_length": 93.85416984558105, "completions/min_length": 33.0, "epoch": 5.851824274013403, "grad_norm": 0.0029534155832625256, "kl": 0.2666015625, "learning_rate": 3.7413468342314904e-07, "loss": 0.0002663064224179834, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3927, "train_speed(iter/s)": 0.027432 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 77.15625190734863, "completions/min_length": 37.0, "epoch": 5.85331347728965, "grad_norm": 0.0031798107025517103, "kl": 0.330078125, "learning_rate": 3.739058187839461e-07, "loss": 0.00032948373700492084, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3928, "train_speed(iter/s)": 0.027435 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 93.66666984558105, "completions/min_length": 33.5, "epoch": 5.854802680565897, "grad_norm": 0.0030709428639171207, "kl": 0.292724609375, "learning_rate": 3.736769823543503e-07, "loss": 0.00029235833790153265, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3929, "train_speed(iter/s)": 0.027433 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 96.65625381469727, "completions/min_length": 34.0, "epoch": 5.856291883842145, "grad_norm": 0.003265628370545442, "kl": 0.27685546875, "learning_rate": 3.7344817418555685e-07, "loss": 0.00027679605409502983, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3930, "train_speed(iter/s)": 0.027431 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 100.67708778381348, "completions/min_length": 36.25, "epoch": 5.857781087118392, "grad_norm": 1.4160683647631396, "kl": 0.25634765625, "learning_rate": 3.732193943287543e-07, "loss": -0.03129998594522476, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3931, "train_speed(iter/s)": 0.02743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 92.53125190734863, "completions/min_length": 38.25, "epoch": 5.8592702903946385, "grad_norm": 0.003984599300788662, "kl": 0.27978515625, "learning_rate": 3.7299064283512494e-07, "loss": 0.00028022294281981885, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3932, "train_speed(iter/s)": 0.027429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 87.83333587646484, "completions/min_length": 29.5, "epoch": 5.860759493670886, "grad_norm": 1.0477900442220607, "kl": 0.3076171875, "learning_rate": 3.7276191975584486e-07, "loss": 0.0011842878302559257, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3933, "train_speed(iter/s)": 0.027429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 81.97916984558105, "completions/min_length": 36.0, "epoch": 5.862248696947133, "grad_norm": 0.028240814465538642, "kl": 0.32568359375, "learning_rate": 3.7253322514208333e-07, "loss": 0.00032560958061367273, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3934, "train_speed(iter/s)": 0.027428 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 103.59375190734863, "completions/min_length": 38.0, "epoch": 5.863737900223381, "grad_norm": 0.003232980225997885, "kl": 0.27099609375, "learning_rate": 3.723045590450038e-07, "loss": 0.0002706647792365402, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3935, "train_speed(iter/s)": 0.027427 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 90.92708778381348, "completions/min_length": 34.25, "epoch": 5.8652271034996275, "grad_norm": 1.4483979133770941, "kl": 0.3173828125, "learning_rate": 3.720759215157632e-07, "loss": 0.009442346170544624, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3936, "train_speed(iter/s)": 0.027429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 77.03125190734863, "completions/min_length": 33.0, "epoch": 5.866716306775875, "grad_norm": 0.00300543759960054, "kl": 0.31494140625, "learning_rate": 3.718473126055117e-07, "loss": 0.0003148158430121839, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3937, "train_speed(iter/s)": 0.027429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 76.90625381469727, "completions/min_length": 35.0, "epoch": 5.868205510052122, "grad_norm": 0.0037204428823414478, "kl": 0.32421875, "learning_rate": 3.7161873236539386e-07, "loss": 0.0003237687924411148, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3938, "train_speed(iter/s)": 0.02743 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 78.05208587646484, "completions/min_length": 25.5, "epoch": 5.86969471332837, "grad_norm": 0.0032032415909687556, "kl": 0.34228515625, "learning_rate": 3.713901808465467e-07, "loss": 0.0003422479494474828, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3939, "train_speed(iter/s)": 0.027429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 81.75000095367432, "completions/min_length": 27.25, "epoch": 5.8711839166046165, "grad_norm": 0.007063505146906251, "kl": 0.34765625, "learning_rate": 3.7116165810010175e-07, "loss": 0.0003471525851637125, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3940, "train_speed(iter/s)": 0.027427 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 92.09375190734863, "completions/min_length": 31.75, "epoch": 5.872673119880863, "grad_norm": 1.3086281831091389, "kl": 0.28076171875, "learning_rate": 3.7093316417718404e-07, "loss": 0.0028691531624644995, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3941, "train_speed(iter/s)": 0.027425 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.0, "completions/mean_length": 96.23958587646484, "completions/min_length": 32.0, "epoch": 5.874162323157111, "grad_norm": 0.004022188384134309, "kl": 0.3076171875, "learning_rate": 3.7070469912891127e-07, "loss": 0.0003076601424254477, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3942, "train_speed(iter/s)": 0.027425 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 88.97916984558105, "completions/min_length": 31.0, "epoch": 5.875651526433359, "grad_norm": 1.933136373788353, "kl": 0.32275390625, "learning_rate": 3.7047626300639586e-07, "loss": 0.007019279059022665, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.32092025876045227, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3943, "train_speed(iter/s)": 0.027421 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 91.83333587646484, "completions/min_length": 30.5, "epoch": 5.877140729709605, "grad_norm": 0.0034896196378158555, "kl": 0.31396484375, "learning_rate": 3.702478558607429e-07, "loss": 0.0003138649044558406, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3944, "train_speed(iter/s)": 0.027424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 82.28125381469727, "completions/min_length": 34.0, "epoch": 5.878629932985852, "grad_norm": 1.5885252165116646, "kl": 0.307373046875, "learning_rate": 3.700194777430513e-07, "loss": 0.006632120348513126, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3945, "train_speed(iter/s)": 0.027424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.75, "completions/mean_length": 88.20833396911621, "completions/min_length": 33.0, "epoch": 5.8801191362621, "grad_norm": 1.4520631311199113, "kl": 0.2822265625, "learning_rate": 3.69791128704414e-07, "loss": 0.025243273004889488, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3946, "train_speed(iter/s)": 0.027424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 85.33333587646484, "completions/min_length": 33.75, "epoch": 5.881608339538347, "grad_norm": 0.01977254384474968, "kl": 0.30517578125, "learning_rate": 3.6956280879591615e-07, "loss": 0.00030535776750184596, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3947, "train_speed(iter/s)": 0.027424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 80.51041984558105, "completions/min_length": 30.0, "epoch": 5.883097542814594, "grad_norm": 0.004157299368808261, "kl": 0.3349609375, "learning_rate": 3.693345180686377e-07, "loss": 0.0003352318308316171, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3948, "train_speed(iter/s)": 0.027425 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 80.11458587646484, "completions/min_length": 31.25, "epoch": 5.884586746090841, "grad_norm": 0.0035488135616518, "kl": 0.30859375, "learning_rate": 3.6910625657365113e-07, "loss": 0.0003083139890804887, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3949, "train_speed(iter/s)": 0.027424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 77.54166793823242, "completions/min_length": 29.0, "epoch": 5.886075949367089, "grad_norm": 0.026032317946858227, "kl": 0.353515625, "learning_rate": 3.6887802436202303e-07, "loss": 0.000353448063833639, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3950, "train_speed(iter/s)": 0.027426 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 88.31250190734863, "completions/min_length": 28.0, "epoch": 5.887565152643336, "grad_norm": 0.0033597585157201233, "kl": 0.27294921875, "learning_rate": 3.686498214848134e-07, "loss": 0.00027279986534267664, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3951, "train_speed(iter/s)": 0.027426 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.5, "completions/mean_length": 76.28125286102295, "completions/min_length": 31.5, "epoch": 5.889054355919583, "grad_norm": 0.0034979511685261405, "kl": 0.34375, "learning_rate": 3.684216479930748e-07, "loss": 0.00034448207588866353, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3952, "train_speed(iter/s)": 0.027424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 67.82291793823242, "completions/min_length": 26.25, "epoch": 5.89054355919583, "grad_norm": 0.0030612927880709026, "kl": 0.337890625, "learning_rate": 3.6819350393785443e-07, "loss": 0.0003375906671863049, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3953, "train_speed(iter/s)": 0.027424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 82.03125190734863, "completions/min_length": 34.75, "epoch": 5.892032762472077, "grad_norm": 0.003225623546102024, "kl": 0.29638671875, "learning_rate": 3.679653893701923e-07, "loss": 0.0002964088344015181, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3954, "train_speed(iter/s)": 0.027425 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 80.07291984558105, "completions/min_length": 28.5, "epoch": 5.893521965748325, "grad_norm": 0.0031044133739681415, "kl": 0.3251953125, "learning_rate": 3.677373043411215e-07, "loss": 0.00032490130979567766, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3955, "train_speed(iter/s)": 0.027425 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 82.78125381469727, "completions/min_length": 31.25, "epoch": 5.895011169024572, "grad_norm": 0.0030150527584922563, "kl": 0.2880859375, "learning_rate": 3.6750924890166923e-07, "loss": 0.0002880502142943442, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3956, "train_speed(iter/s)": 0.027426 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 132.0, "completions/mean_length": 68.94791889190674, "completions/min_length": 30.5, "epoch": 5.896500372300819, "grad_norm": 0.7734458169512702, "kl": 0.33544921875, "learning_rate": 3.6728122310285546e-07, "loss": -0.01573048159480095, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3957, "train_speed(iter/s)": 0.027429 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 79.16666889190674, "completions/min_length": 26.75, "epoch": 5.897989575577066, "grad_norm": 0.9298543824634234, "kl": 0.3505859375, "learning_rate": 3.67053226995694e-07, "loss": 0.023794211447238922, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3958, "train_speed(iter/s)": 0.027428 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 80.06250190734863, "completions/min_length": 29.5, "epoch": 5.899478778853314, "grad_norm": 0.003613906027688273, "kl": 0.32470703125, "learning_rate": 3.6682526063119203e-07, "loss": 0.00032435578759759665, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3959, "train_speed(iter/s)": 0.027427 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 78.35416793823242, "completions/min_length": 23.5, "epoch": 5.90096798212956, "grad_norm": 1.5121840963966349, "kl": 0.4150390625, "learning_rate": 3.665973240603493e-07, "loss": -0.012657745741307735, "memory(GiB)": 112.53, "reward": 1.7083334028720856, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.708333358168602, "rewards/CineAccuracyORM/std": 0.43725670874118805, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3960, "train_speed(iter/s)": 0.027426 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 84.55208587646484, "completions/min_length": 32.75, "epoch": 5.902457185405808, "grad_norm": 0.0034458599295392454, "kl": 0.3291015625, "learning_rate": 3.6636941733415976e-07, "loss": 0.00032867860863916576, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3961, "train_speed(iter/s)": 0.027425 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 87.34375381469727, "completions/min_length": 33.25, "epoch": 5.903946388682055, "grad_norm": 1.684584049047344, "kl": 0.306640625, "learning_rate": 3.661415405036103e-07, "loss": 0.01885862648487091, "memory(GiB)": 112.53, "reward": 1.46875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.46875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3962, "train_speed(iter/s)": 0.02742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 75.58333587646484, "completions/min_length": 22.0, "epoch": 5.905435591958303, "grad_norm": 0.003280997316549587, "kl": 0.34912109375, "learning_rate": 3.6591369361968123e-07, "loss": 0.0003492634859867394, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3963, "train_speed(iter/s)": 0.027419 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.0, "completions/mean_length": 77.73958396911621, "completions/min_length": 29.5, "epoch": 5.906924795234549, "grad_norm": 0.0036671741926192613, "kl": 0.32470703125, "learning_rate": 3.6568587673334626e-07, "loss": 0.00032483565155416727, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3964, "train_speed(iter/s)": 0.027418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 86.46875190734863, "completions/min_length": 35.75, "epoch": 5.908413998510797, "grad_norm": 1.2250975026880817, "kl": 0.29736328125, "learning_rate": 3.65458089895572e-07, "loss": -0.0003781797713600099, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000055879354, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3965, "train_speed(iter/s)": 0.027419 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 82.80208587646484, "completions/min_length": 23.25, "epoch": 5.909903201787044, "grad_norm": 0.0035997581613778167, "kl": 0.30224609375, "learning_rate": 3.652303331573189e-07, "loss": 0.00030180427711457014, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3966, "train_speed(iter/s)": 0.027421 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 140.25, "completions/mean_length": 70.85416984558105, "completions/min_length": 22.0, "epoch": 5.911392405063291, "grad_norm": 1.4218221788692547, "kl": 0.31201171875, "learning_rate": 3.6500260656953995e-07, "loss": -0.0019537003245204687, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3967, "train_speed(iter/s)": 0.02742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 84.90625190734863, "completions/min_length": 26.5, "epoch": 5.912881608339538, "grad_norm": 0.9419144343616466, "kl": 0.30029296875, "learning_rate": 3.6477491018318207e-07, "loss": 0.005016203969717026, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3968, "train_speed(iter/s)": 0.027417 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 82.08333587646484, "completions/min_length": 25.75, "epoch": 5.914370811615786, "grad_norm": 0.9401898661210042, "kl": 0.33935546875, "learning_rate": 3.6454724404918517e-07, "loss": -0.012025995180010796, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3969, "train_speed(iter/s)": 0.027418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.25, "completions/mean_length": 81.25000381469727, "completions/min_length": 31.0, "epoch": 5.915860014892033, "grad_norm": 0.004799482608160019, "kl": 0.296875, "learning_rate": 3.643196082184823e-07, "loss": 0.00029732479015365243, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3970, "train_speed(iter/s)": 0.027418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 84.51041984558105, "completions/min_length": 30.25, "epoch": 5.91734921816828, "grad_norm": 0.0035188629143884155, "kl": 0.31640625, "learning_rate": 3.6409200274200005e-07, "loss": 0.00031637796200811863, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3971, "train_speed(iter/s)": 0.027415 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 68.85416889190674, "completions/min_length": 21.25, "epoch": 5.918838421444527, "grad_norm": 0.0040165297126492265, "kl": 0.36962890625, "learning_rate": 3.638644276706576e-07, "loss": 0.0003696987987495959, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3972, "train_speed(iter/s)": 0.027414 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 73.01041793823242, "completions/min_length": 33.25, "epoch": 5.920327624720774, "grad_norm": 1.651778446867769, "kl": 2.47314453125, "learning_rate": 3.6363688305536786e-07, "loss": -0.0225679874420166, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.08311937749385834, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3973, "train_speed(iter/s)": 0.027415 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.0, "completions/mean_length": 72.53125190734863, "completions/min_length": 29.25, "epoch": 5.921816827997022, "grad_norm": 1.3991986660373652, "kl": 0.34423828125, "learning_rate": 3.6340936894703713e-07, "loss": -0.0020367398392409086, "memory(GiB)": 112.53, "reward": 1.8645833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.12724433839321136, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3974, "train_speed(iter/s)": 0.027414 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.5, "completions/mean_length": 72.10416984558105, "completions/min_length": 28.0, "epoch": 5.923306031273269, "grad_norm": 0.0033735382610442727, "kl": 0.32666015625, "learning_rate": 3.6318188539656395e-07, "loss": 0.00032652285881340504, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3975, "train_speed(iter/s)": 0.027416 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 83.86458396911621, "completions/min_length": 28.75, "epoch": 5.924795234549516, "grad_norm": 0.003437045341224601, "kl": 0.3046875, "learning_rate": 3.629544324548412e-07, "loss": 0.0003050158265978098, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3976, "train_speed(iter/s)": 0.027415 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 75.11458587646484, "completions/min_length": 24.5, "epoch": 5.926284437825763, "grad_norm": 0.003603133759696932, "kl": 0.318359375, "learning_rate": 3.627270101727538e-07, "loss": 0.000318798964144662, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3977, "train_speed(iter/s)": 0.027418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 81.51041984558105, "completions/min_length": 26.25, "epoch": 5.927773641102011, "grad_norm": 0.0033472822538541547, "kl": 0.30322265625, "learning_rate": 3.6249961860118054e-07, "loss": 0.00030311045702546835, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3978, "train_speed(iter/s)": 0.02742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 81.44791793823242, "completions/min_length": 41.5, "epoch": 5.929262844378258, "grad_norm": 0.0034345915372827724, "kl": 0.314453125, "learning_rate": 3.622722577909934e-07, "loss": 0.00031474712886847556, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3979, "train_speed(iter/s)": 0.027421 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 76.95833587646484, "completions/min_length": 27.25, "epoch": 5.930752047654504, "grad_norm": 1.6068997453515075, "kl": 0.353515625, "learning_rate": 3.6204492779305674e-07, "loss": -0.0032660262659192085, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3980, "train_speed(iter/s)": 0.027418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.25, "completions/mean_length": 71.50000190734863, "completions/min_length": 28.75, "epoch": 5.932241250930752, "grad_norm": 2.1971703013399986, "kl": 0.359375, "learning_rate": 3.6181762865822884e-07, "loss": -0.004891776479780674, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3981, "train_speed(iter/s)": 0.027418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 78.26041793823242, "completions/min_length": 28.75, "epoch": 5.933730454207, "grad_norm": 0.7728379294263669, "kl": 0.32080078125, "learning_rate": 3.615903604373605e-07, "loss": -0.005785677116364241, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3982, "train_speed(iter/s)": 0.027418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 83.37500381469727, "completions/min_length": 31.5, "epoch": 5.9352196574832465, "grad_norm": 0.003125898147546116, "kl": 0.32275390625, "learning_rate": 3.6136312318129597e-07, "loss": 0.00032281538005918264, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3983, "train_speed(iter/s)": 0.02742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 80.86458587646484, "completions/min_length": 27.0, "epoch": 5.936708860759493, "grad_norm": 0.025324174640269124, "kl": 0.32080078125, "learning_rate": 3.6113591694087263e-07, "loss": 0.0003211962175555527, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3984, "train_speed(iter/s)": 0.027421 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 75.86458683013916, "completions/min_length": 35.5, "epoch": 5.938198064035741, "grad_norm": 0.003185059735844237, "kl": 0.3212890625, "learning_rate": 3.609087417669203e-07, "loss": 0.0003208933339919895, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3985, "train_speed(iter/s)": 0.027418 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 78.59375190734863, "completions/min_length": 29.0, "epoch": 5.939687267311988, "grad_norm": 0.0031738357183121457, "kl": 0.33642578125, "learning_rate": 3.6068159771026264e-07, "loss": 0.0003360914997756481, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3986, "train_speed(iter/s)": 0.027417 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 68.05208492279053, "completions/min_length": 22.5, "epoch": 5.9411764705882355, "grad_norm": 2.587123674928888, "kl": 0.3544921875, "learning_rate": 3.604544848217157e-07, "loss": -0.003545768093317747, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3987, "train_speed(iter/s)": 0.02742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 84.15625190734863, "completions/min_length": 35.5, "epoch": 5.942665673864482, "grad_norm": 1.2111347023031849, "kl": 0.310546875, "learning_rate": 3.60227403152089e-07, "loss": 0.020246217027306557, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3988, "train_speed(iter/s)": 0.02742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 79.38541889190674, "completions/min_length": 32.75, "epoch": 5.94415487714073, "grad_norm": 0.00404380447241319, "kl": 0.32421875, "learning_rate": 3.600003527521851e-07, "loss": 0.0003237011260353029, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3989, "train_speed(iter/s)": 0.02742 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 91.10416793823242, "completions/min_length": 26.5, "epoch": 5.945644080416977, "grad_norm": 0.0035779420915900864, "kl": 0.279052734375, "learning_rate": 3.5977333367279903e-07, "loss": 0.0002791562583297491, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3990, "train_speed(iter/s)": 0.027422 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 86.07291984558105, "completions/min_length": 25.25, "epoch": 5.9471332836932245, "grad_norm": 0.003058603628249398, "kl": 0.2666015625, "learning_rate": 3.5954634596471954e-07, "loss": 0.00026674760738387704, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3991, "train_speed(iter/s)": 0.027424 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 80.15625190734863, "completions/min_length": 26.75, "epoch": 5.948622486969471, "grad_norm": 0.002993773701764081, "kl": 0.33349609375, "learning_rate": 3.593193896787277e-07, "loss": 0.0003337882808409631, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3992, "train_speed(iter/s)": 0.027423 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 76.27083587646484, "completions/min_length": 28.25, "epoch": 5.950111690245718, "grad_norm": 1.2009027062392095, "kl": 0.3310546875, "learning_rate": 3.5909246486559775e-07, "loss": -0.012406076304614544, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3993, "train_speed(iter/s)": 0.027422 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 80.26041984558105, "completions/min_length": 38.25, "epoch": 5.951600893521966, "grad_norm": 0.003438126924098469, "kl": 0.32470703125, "learning_rate": 3.5886557157609735e-07, "loss": 0.00032492156606167555, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3994, "train_speed(iter/s)": 0.027419 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 79.79166984558105, "completions/min_length": 35.25, "epoch": 5.953090096798213, "grad_norm": 0.003245342883214095, "kl": 0.32177734375, "learning_rate": 3.586387098609865e-07, "loss": 0.00032176129752770066, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3995, "train_speed(iter/s)": 0.027416 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 80.29167175292969, "completions/min_length": 31.25, "epoch": 5.95457930007446, "grad_norm": 1.6359317755040368, "kl": 0.2998046875, "learning_rate": 3.584118797710186e-07, "loss": -0.008188937790691853, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3470437303185463, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3996, "train_speed(iter/s)": 0.027415 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 85.77083587646484, "completions/min_length": 36.25, "epoch": 5.956068503350707, "grad_norm": 0.0033994618596092582, "kl": 0.29736328125, "learning_rate": 3.5818508135693937e-07, "loss": 0.00029832692234776914, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3997, "train_speed(iter/s)": 0.027414 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 262.5, "completions/mean_length": 88.46875190734863, "completions/min_length": 29.5, "epoch": 5.957557706626955, "grad_norm": 1.3830089532710863, "kl": 0.31787109375, "learning_rate": 3.57958314669488e-07, "loss": 0.06956753134727478, "memory(GiB)": 112.53, "reward": 1.6250000596046448, "reward_std": 0.09750140458345413, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.49030808359384537, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 3998, "train_speed(iter/s)": 0.027413 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 91.40625190734863, "completions/min_length": 33.5, "epoch": 5.9590469099032015, "grad_norm": 1.3988488035194817, "kl": 0.30029296875, "learning_rate": 3.5773157975939656e-07, "loss": 0.014564458280801773, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.46742958575487137, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 3999, "train_speed(iter/s)": 0.027412 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 75.20833587646484, "completions/min_length": 29.75, "epoch": 5.960536113179449, "grad_norm": 0.7633011200265373, "kl": 0.3388671875, "learning_rate": 3.575048766773896e-07, "loss": -0.007878290489315987, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4000, "train_speed(iter/s)": 0.02741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.25, "completions/mean_length": 78.37500381469727, "completions/min_length": 32.75, "epoch": 5.962025316455696, "grad_norm": 0.003018960022289229, "kl": 0.3037109375, "learning_rate": 3.5727820547418517e-07, "loss": 0.0003035839181393385, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4001, "train_speed(iter/s)": 0.027402 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 76.84375381469727, "completions/min_length": 31.25, "epoch": 5.963514519731944, "grad_norm": 0.003897711661421007, "kl": 0.3271484375, "learning_rate": 3.570515662004934e-07, "loss": 0.0003270252491347492, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4002, "train_speed(iter/s)": 0.027403 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.5, "completions/mean_length": 78.22916793823242, "completions/min_length": 33.25, "epoch": 5.9650037230081905, "grad_norm": 0.0032795671786956574, "kl": 0.30859375, "learning_rate": 3.568249589070178e-07, "loss": 0.00030862900894135237, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4003, "train_speed(iter/s)": 0.027404 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 77.92708396911621, "completions/min_length": 35.5, "epoch": 5.966492926284438, "grad_norm": 0.004014676828063825, "kl": 0.3056640625, "learning_rate": 3.56598383644455e-07, "loss": 0.0003056584973819554, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4004, "train_speed(iter/s)": 0.027405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.0, "completions/mean_length": 94.83333778381348, "completions/min_length": 26.5, "epoch": 5.967982129560685, "grad_norm": 0.003327344473550625, "kl": 0.282958984375, "learning_rate": 3.563718404634936e-07, "loss": 0.00028310978086665273, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4005, "train_speed(iter/s)": 0.027407 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 89.32291984558105, "completions/min_length": 40.25, "epoch": 5.969471332836932, "grad_norm": 0.002968733395641424, "kl": 0.288818359375, "learning_rate": 3.561453294148156e-07, "loss": 0.00028917795862071216, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4006, "train_speed(iter/s)": 0.027407 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.75, "completions/mean_length": 84.90625190734863, "completions/min_length": 29.5, "epoch": 5.9709605361131795, "grad_norm": 0.003144932515785366, "kl": 0.30322265625, "learning_rate": 3.55918850549096e-07, "loss": 0.0003033058892469853, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4007, "train_speed(iter/s)": 0.027406 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.25, "completions/mean_length": 92.34375286102295, "completions/min_length": 20.0, "epoch": 5.972449739389427, "grad_norm": 1.059317007089932, "kl": 0.3291015625, "learning_rate": 3.5569240391700206e-07, "loss": -0.001330360071733594, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4008, "train_speed(iter/s)": 0.027404 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 75.23958587646484, "completions/min_length": 35.5, "epoch": 5.973938942665674, "grad_norm": 0.05417276071433291, "kl": 0.330078125, "learning_rate": 3.5546598956919444e-07, "loss": 0.00032946065766736865, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4009, "train_speed(iter/s)": 0.027407 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 93.04166793823242, "completions/min_length": 39.75, "epoch": 5.975428145941921, "grad_norm": 1.130864695221431, "kl": 0.28173828125, "learning_rate": 3.552396075563257e-07, "loss": 0.0046720136888325214, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4010, "train_speed(iter/s)": 0.027409 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 90.75000381469727, "completions/min_length": 28.25, "epoch": 5.976917349218168, "grad_norm": 0.0029129507967389774, "kl": 0.28173828125, "learning_rate": 3.55013257929042e-07, "loss": 0.0002819453948177397, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4011, "train_speed(iter/s)": 0.027411 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 84.47916793823242, "completions/min_length": 31.25, "epoch": 5.978406552494415, "grad_norm": 1.521947047620176, "kl": 0.280517578125, "learning_rate": 3.5478694073798204e-07, "loss": -0.011357543990015984, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4012, "train_speed(iter/s)": 0.027413 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 86.06250381469727, "completions/min_length": 27.25, "epoch": 5.979895755770663, "grad_norm": 2.1235665036237092, "kl": 0.337890625, "learning_rate": 3.5456065603377694e-07, "loss": 0.01223609410226345, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4013, "train_speed(iter/s)": 0.027413 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 95.85416793823242, "completions/min_length": 30.0, "epoch": 5.98138495904691, "grad_norm": 0.002925565416029091, "kl": 0.2861328125, "learning_rate": 3.5433440386705115e-07, "loss": 0.00028622569516301155, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4014, "train_speed(iter/s)": 0.027413 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 85.69791984558105, "completions/min_length": 31.75, "epoch": 5.982874162323157, "grad_norm": 0.0031200289616721624, "kl": 0.306640625, "learning_rate": 3.54108184288421e-07, "loss": 0.0003064831253141165, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4015, "train_speed(iter/s)": 0.027412 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 88.09375381469727, "completions/min_length": 33.25, "epoch": 5.984363365599404, "grad_norm": 1.014657842308557, "kl": 0.296875, "learning_rate": 3.5388199734849623e-07, "loss": -0.00639651482924819, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4016, "train_speed(iter/s)": 0.027409 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 86.19791984558105, "completions/min_length": 39.25, "epoch": 5.985852568875652, "grad_norm": 0.0032333751807453563, "kl": 0.30322265625, "learning_rate": 3.536558430978792e-07, "loss": 0.00030284677632153034, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4017, "train_speed(iter/s)": 0.027411 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 86.42708587646484, "completions/min_length": 36.5, "epoch": 5.987341772151899, "grad_norm": 2.7003008622011118, "kl": 0.41064453125, "learning_rate": 3.5342972158716457e-07, "loss": 0.023612961173057556, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.17466487362980843, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4018, "train_speed(iter/s)": 0.027411 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 89.22916984558105, "completions/min_length": 34.25, "epoch": 5.9888309754281455, "grad_norm": 1.3298981010012496, "kl": 0.29150390625, "learning_rate": 3.532036328669401e-07, "loss": -0.005732033401727676, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4019, "train_speed(iter/s)": 0.027411 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 91.88541984558105, "completions/min_length": 39.0, "epoch": 5.990320178704393, "grad_norm": 0.0030072787905190404, "kl": 0.271484375, "learning_rate": 3.5297757698778586e-07, "loss": 0.0002712618443183601, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4020, "train_speed(iter/s)": 0.027412 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 92.88541984558105, "completions/min_length": 39.0, "epoch": 5.991809381980641, "grad_norm": 0.0029327461235563014, "kl": 0.281494140625, "learning_rate": 3.5275155400027476e-07, "loss": 0.0002813897735904902, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4021, "train_speed(iter/s)": 0.02741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 88.35416793823242, "completions/min_length": 36.75, "epoch": 5.993298585256888, "grad_norm": 0.021072458393331016, "kl": 0.33544921875, "learning_rate": 3.525255639549727e-07, "loss": 0.00033567551872693, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4022, "train_speed(iter/s)": 0.02741 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 96.87500190734863, "completions/min_length": 39.0, "epoch": 5.994787788533134, "grad_norm": 1.3851077968132375, "kl": 0.2646484375, "learning_rate": 3.5229960690243726e-07, "loss": -0.001033032312989235, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4023, "train_speed(iter/s)": 0.027411 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 84.21875381469727, "completions/min_length": 29.25, "epoch": 5.996276991809382, "grad_norm": 0.003431013616990317, "kl": 0.32568359375, "learning_rate": 3.5207368289321966e-07, "loss": 0.0003256881609559059, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4024, "train_speed(iter/s)": 0.027408 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 89.02083587646484, "completions/min_length": 35.5, "epoch": 5.997766195085629, "grad_norm": 1.2350257854775404, "kl": 0.3134765625, "learning_rate": 3.5184779197786306e-07, "loss": -0.024016378447413445, "memory(GiB)": 112.53, "reward": 1.5520833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5520833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4025, "train_speed(iter/s)": 0.027405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/mean_length": 90.11458587646484, "completions/min_length": 24.0, "epoch": 5.999255398361877, "grad_norm": 0.002797808234463544, "kl": 0.2880859375, "learning_rate": 3.516219342069035e-07, "loss": 0.0002884482964873314, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4026, "train_speed(iter/s)": 0.027401 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.25, "completions/mean_length": 81.70833587646484, "completions/min_length": 34.75, "epoch": 6.001489203276247, "grad_norm": 0.003017957799089542, "kl": 0.30712890625, "learning_rate": 3.513961096308699e-07, "loss": 0.00030754850013181567, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4027, "train_speed(iter/s)": 0.027401 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 85.14583587646484, "completions/min_length": 30.5, "epoch": 6.0029784065524945, "grad_norm": 1.8908355047527985, "kl": 0.31982421875, "learning_rate": 3.511703183002827e-07, "loss": -0.004605370573699474, "memory(GiB)": 112.53, "reward": 1.9375000298023224, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.9375000149011612, "rewards/CineAccuracyORM/std": 0.16575583815574646, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4028, "train_speed(iter/s)": 0.027403 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 94.66666984558105, "completions/min_length": 33.75, "epoch": 6.004467609828741, "grad_norm": 0.0029290516909552355, "kl": 0.275390625, "learning_rate": 3.5094456026565634e-07, "loss": 0.0002749887353274971, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4029, "train_speed(iter/s)": 0.027405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.25, "completions/mean_length": 91.59375381469727, "completions/min_length": 37.75, "epoch": 6.005956813104989, "grad_norm": 0.0029345362434253235, "kl": 0.30126953125, "learning_rate": 3.507188355774965e-07, "loss": 0.0003018892020918429, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4030, "train_speed(iter/s)": 0.027405 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.75, "completions/mean_length": 80.02083587646484, "completions/min_length": 34.75, "epoch": 6.007446016381236, "grad_norm": 0.003090202392886858, "kl": 0.287109375, "learning_rate": 3.504931442863022e-07, "loss": 0.0002873383346013725, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4031, "train_speed(iter/s)": 0.027408 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 91.05208587646484, "completions/min_length": 32.0, "epoch": 6.0089352196574835, "grad_norm": 0.0030767478939731684, "kl": 0.29833984375, "learning_rate": 3.50267486442565e-07, "loss": 0.00029852072475478053, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4032, "train_speed(iter/s)": 0.027406 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 84.41666984558105, "completions/min_length": 37.25, "epoch": 6.01042442293373, "grad_norm": 1.0292755759996137, "kl": 0.2919921875, "learning_rate": 3.5004186209676833e-07, "loss": -0.00937992986291647, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000037252903, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4033, "train_speed(iter/s)": 0.027406 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.5, "completions/mean_length": 92.39583587646484, "completions/min_length": 45.0, "epoch": 6.011913626209978, "grad_norm": 1.3555789919717118, "kl": 0.29248046875, "learning_rate": 3.4981627129938895e-07, "loss": -0.0015423046424984932, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4034, "train_speed(iter/s)": 0.027404 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 83.51041984558105, "completions/min_length": 32.25, "epoch": 6.013402829486225, "grad_norm": 0.00318157272205374, "kl": 0.30517578125, "learning_rate": 3.495907141008954e-07, "loss": 0.0003053898108191788, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4035, "train_speed(iter/s)": 0.027402 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 88.61458396911621, "completions/min_length": 33.25, "epoch": 6.014892032762472, "grad_norm": 0.0029980735462472635, "kl": 0.30908203125, "learning_rate": 3.493651905517491e-07, "loss": 0.0003093881532549858, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4036, "train_speed(iter/s)": 0.027398 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 89.65625, "completions/min_length": 28.25, "epoch": 6.016381236038719, "grad_norm": 1.100669839560024, "kl": 0.28173828125, "learning_rate": 3.4913970070240386e-07, "loss": 0.012202868238091469, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4037, "train_speed(iter/s)": 0.027395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 77.89583587646484, "completions/min_length": 26.0, "epoch": 6.017870439314967, "grad_norm": 0.003086017051138442, "kl": 0.34375, "learning_rate": 3.489142446033059e-07, "loss": 0.0003438584681134671, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4038, "train_speed(iter/s)": 0.027396 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 90.63541984558105, "completions/min_length": 39.5, "epoch": 6.019359642591214, "grad_norm": 1.7299140846664225, "kl": 0.3115234375, "learning_rate": 3.4868882230489416e-07, "loss": 0.011502708308398724, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.864583358168602, "rewards/CineAccuracyORM/std": 0.26659026369452477, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4039, "train_speed(iter/s)": 0.027395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 95.68750190734863, "completions/min_length": 35.75, "epoch": 6.0208488458674605, "grad_norm": 0.0031128154302867386, "kl": 0.28466796875, "learning_rate": 3.4846343385759947e-07, "loss": 0.00028449518140405416, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4040, "train_speed(iter/s)": 0.027395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 99.51041984558105, "completions/min_length": 23.5, "epoch": 6.022338049143708, "grad_norm": 0.0029562443002886184, "kl": 0.294189453125, "learning_rate": 3.4823807931184547e-07, "loss": 0.0002936766832135618, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4041, "train_speed(iter/s)": 0.027394 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/mean_length": 87.18750190734863, "completions/min_length": 27.5, "epoch": 6.023827252419955, "grad_norm": 0.0030303394277851697, "kl": 0.28515625, "learning_rate": 3.4801275871804847e-07, "loss": 0.00028485545772127807, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4042, "train_speed(iter/s)": 0.027395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 100.67708587646484, "completions/min_length": 37.5, "epoch": 6.025316455696203, "grad_norm": 0.0028004200486807375, "kl": 0.27001953125, "learning_rate": 3.477874721266164e-07, "loss": 0.0002701063349377364, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4043, "train_speed(iter/s)": 0.027395 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 95.37500190734863, "completions/min_length": 31.5, "epoch": 6.0268056589724495, "grad_norm": 0.0029739346143924338, "kl": 0.275390625, "learning_rate": 3.475622195879504e-07, "loss": 0.000275248457910493, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4044, "train_speed(iter/s)": 0.027396 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 87.73958778381348, "completions/min_length": 27.0, "epoch": 6.028294862248697, "grad_norm": 0.003367104513282996, "kl": 0.302734375, "learning_rate": 3.4733700115244344e-07, "loss": 0.0003028464561793953, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4045, "train_speed(iter/s)": 0.027396 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 75.81250190734863, "completions/min_length": 29.75, "epoch": 6.029784065524944, "grad_norm": 0.006201394280558707, "kl": 0.32763671875, "learning_rate": 3.4711181687048106e-07, "loss": 0.0003280055243521929, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4046, "train_speed(iter/s)": 0.027397 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 93.42708587646484, "completions/min_length": 36.5, "epoch": 6.031273268801192, "grad_norm": 1.860337662552448, "kl": 0.279296875, "learning_rate": 3.468866667924415e-07, "loss": 0.018694400787353516, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.18116392940282822, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4047, "train_speed(iter/s)": 0.027394 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 89.46875190734863, "completions/min_length": 34.5, "epoch": 6.032762472077438, "grad_norm": 0.0031018305946319286, "kl": 0.31298828125, "learning_rate": 3.4666155096869453e-07, "loss": 0.00031373591627925634, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4048, "train_speed(iter/s)": 0.027393 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 94.81250381469727, "completions/min_length": 42.5, "epoch": 6.034251675353686, "grad_norm": 1.0403384322959566, "kl": 0.275390625, "learning_rate": 3.4643646944960305e-07, "loss": 0.02265581302344799, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4049, "train_speed(iter/s)": 0.027394 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 91.04166793823242, "completions/min_length": 42.0, "epoch": 6.035740878629933, "grad_norm": 0.00847431755625515, "kl": 0.281494140625, "learning_rate": 3.462114222855218e-07, "loss": 0.000281398999504745, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4050, "train_speed(iter/s)": 0.027394 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 90.08333778381348, "completions/min_length": 33.5, "epoch": 6.037230081906181, "grad_norm": 0.002997540754370139, "kl": 0.29541015625, "learning_rate": 3.4598640952679805e-07, "loss": 0.0002951522183138877, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4051, "train_speed(iter/s)": 0.027391 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 94.52083778381348, "completions/min_length": 38.75, "epoch": 6.038719285182427, "grad_norm": 0.002953980624303103, "kl": 0.28125, "learning_rate": 3.4576143122377154e-07, "loss": 0.00028115310124121606, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4052, "train_speed(iter/s)": 0.027391 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 91.79166793823242, "completions/min_length": 35.25, "epoch": 6.040208488458674, "grad_norm": 1.0360302211665873, "kl": 0.2724609375, "learning_rate": 3.455364874267737e-07, "loss": 0.008175142109394073, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4053, "train_speed(iter/s)": 0.027391 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 89.17708587646484, "completions/min_length": 26.75, "epoch": 6.041697691734922, "grad_norm": 0.003079508584017553, "kl": 0.2978515625, "learning_rate": 3.453115781861291e-07, "loss": 0.00029817415634170175, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4054, "train_speed(iter/s)": 0.027394 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.25, "completions/mean_length": 96.42708396911621, "completions/min_length": 24.5, "epoch": 6.043186895011169, "grad_norm": 0.002969477358724449, "kl": 0.27685546875, "learning_rate": 3.4508670355215355e-07, "loss": 0.0002766248653642833, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4055, "train_speed(iter/s)": 0.027392 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 90.78125190734863, "completions/min_length": 36.0, "epoch": 6.044676098287416, "grad_norm": 0.0032397577620089796, "kl": 0.26220703125, "learning_rate": 3.4486186357515603e-07, "loss": 0.00026196366525255144, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4056, "train_speed(iter/s)": 0.027392 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 92.03125381469727, "completions/min_length": 37.5, "epoch": 6.046165301563663, "grad_norm": 0.0045133522424418504, "kl": 0.2939453125, "learning_rate": 3.4463705830543745e-07, "loss": 0.00029370366246439517, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4057, "train_speed(iter/s)": 0.027391 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.75, "completions/mean_length": 88.32291793823242, "completions/min_length": 45.5, "epoch": 6.047654504839911, "grad_norm": 0.0030350022388382983, "kl": 0.291748046875, "learning_rate": 3.444122877932907e-07, "loss": 0.0002917185192927718, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4058, "train_speed(iter/s)": 0.02739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.0, "completions/mean_length": 90.01041984558105, "completions/min_length": 41.0, "epoch": 6.049143708116158, "grad_norm": 1.1610212419497004, "kl": 0.29638671875, "learning_rate": 3.441875520890012e-07, "loss": 0.001151600619778037, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4059, "train_speed(iter/s)": 0.027388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 77.54166984558105, "completions/min_length": 32.25, "epoch": 6.050632911392405, "grad_norm": 1.518578546004135, "kl": 0.31005859375, "learning_rate": 3.4396285124284687e-07, "loss": 0.008912025019526482, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4060, "train_speed(iter/s)": 0.027387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 87.12500190734863, "completions/min_length": 32.0, "epoch": 6.052122114668652, "grad_norm": 1.485289922686314, "kl": 0.41259765625, "learning_rate": 3.437381853050968e-07, "loss": -0.0069908155128359795, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7291666939854622, "rewards/CineAccuracyORM/std": 0.3428337797522545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4061, "train_speed(iter/s)": 0.027384 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 88.57291793823242, "completions/min_length": 36.5, "epoch": 6.0536113179449, "grad_norm": 0.8536632778345177, "kl": 0.32763671875, "learning_rate": 3.4351355432601336e-07, "loss": -0.013983285054564476, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4062, "train_speed(iter/s)": 0.027384 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 83.45833587646484, "completions/min_length": 41.5, "epoch": 6.055100521221147, "grad_norm": 0.0030777978865950007, "kl": 0.29931640625, "learning_rate": 3.432889583558505e-07, "loss": 0.00029930929304100573, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4063, "train_speed(iter/s)": 0.027384 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.5, "completions/mean_length": 80.03125190734863, "completions/min_length": 24.0, "epoch": 6.056589724497394, "grad_norm": 2.344893372306611, "kl": 0.3134765625, "learning_rate": 3.4306439744485447e-07, "loss": -0.021208565682172775, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4064, "train_speed(iter/s)": 0.027385 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 88.30208778381348, "completions/min_length": 34.5, "epoch": 6.058078927773641, "grad_norm": 0.0032355020040070815, "kl": 0.2958984375, "learning_rate": 3.428398716432641e-07, "loss": 0.00029583426658064127, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4065, "train_speed(iter/s)": 0.027387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 80.92708587646484, "completions/min_length": 28.5, "epoch": 6.059568131049888, "grad_norm": 0.003242677625796766, "kl": 0.33935546875, "learning_rate": 3.426153810013094e-07, "loss": 0.0003388503100723028, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4066, "train_speed(iter/s)": 0.027387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 149.0, "completions/mean_length": 76.09375381469727, "completions/min_length": 36.25, "epoch": 6.061057334326136, "grad_norm": 0.005261573878557195, "kl": 0.32275390625, "learning_rate": 3.4239092556921366e-07, "loss": 0.0003225158725399524, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4067, "train_speed(iter/s)": 0.027385 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 84.94791984558105, "completions/min_length": 36.5, "epoch": 6.062546537602382, "grad_norm": 0.003666511485086634, "kl": 0.30908203125, "learning_rate": 3.4216650539719114e-07, "loss": 0.00030909033375792205, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4068, "train_speed(iter/s)": 0.027386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 91.54166793823242, "completions/min_length": 37.25, "epoch": 6.06403574087863, "grad_norm": 0.6872461641263845, "kl": 0.30810546875, "learning_rate": 3.4194212053544914e-07, "loss": -0.0019409079104661942, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4069, "train_speed(iter/s)": 0.027388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 83.98958587646484, "completions/min_length": 30.0, "epoch": 6.065524944154877, "grad_norm": 0.0030807353469601193, "kl": 0.31591796875, "learning_rate": 3.4171777103418676e-07, "loss": 0.00031617016065865755, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4070, "train_speed(iter/s)": 0.027385 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 150.0, "completions/mean_length": 81.10416793823242, "completions/min_length": 38.75, "epoch": 6.0670141474311245, "grad_norm": 0.004062636334127172, "kl": 0.3056640625, "learning_rate": 3.414934569435949e-07, "loss": 0.00030601603793911636, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4071, "train_speed(iter/s)": 0.027386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 83.52083587646484, "completions/min_length": 32.25, "epoch": 6.068503350707371, "grad_norm": 1.5888234394630258, "kl": 0.32470703125, "learning_rate": 3.412691783138572e-07, "loss": 0.00013804998889099807, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4072, "train_speed(iter/s)": 0.027388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 87.75000190734863, "completions/min_length": 27.0, "epoch": 6.069992553983619, "grad_norm": 0.03364111216758868, "kl": 0.31884765625, "learning_rate": 3.410449351951484e-07, "loss": 0.0003189999843016267, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4073, "train_speed(iter/s)": 0.027389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 89.50000190734863, "completions/min_length": 38.25, "epoch": 6.071481757259866, "grad_norm": 0.0037069650314764564, "kl": 0.288330078125, "learning_rate": 3.4082072763763613e-07, "loss": 0.00028852318064309657, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4074, "train_speed(iter/s)": 0.027388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 91.89583587646484, "completions/min_length": 36.0, "epoch": 6.0729709605361135, "grad_norm": 0.0032242220476628334, "kl": 0.29638671875, "learning_rate": 3.405965556914799e-07, "loss": 0.000297040882287547, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4075, "train_speed(iter/s)": 0.02739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 85.90625190734863, "completions/min_length": 25.0, "epoch": 6.07446016381236, "grad_norm": 0.008604243980549028, "kl": 0.30859375, "learning_rate": 3.4037241940683094e-07, "loss": 0.0003087850636802614, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4076, "train_speed(iter/s)": 0.027392 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 87.39583587646484, "completions/min_length": 36.25, "epoch": 6.075949367088608, "grad_norm": 1.654418214491139, "kl": 0.31396484375, "learning_rate": 3.401483188338329e-07, "loss": 0.0003132485435344279, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4077, "train_speed(iter/s)": 0.02739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 93.64583778381348, "completions/min_length": 33.5, "epoch": 6.077438570364855, "grad_norm": 0.776603221810296, "kl": 0.288330078125, "learning_rate": 3.39924254022621e-07, "loss": 0.00028842969913966954, "memory(GiB)": 112.53, "reward": 1.4583333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.4583333432674408, "rewards/CineAccuracyORM/std": 0.4888460487127304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4078, "train_speed(iter/s)": 0.027387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 83.84375286102295, "completions/min_length": 34.5, "epoch": 6.078927773641102, "grad_norm": 0.002866974880666635, "kl": 0.2958984375, "learning_rate": 3.397002250233227e-07, "loss": 0.000295715726679191, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4079, "train_speed(iter/s)": 0.027388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 80.50000190734863, "completions/min_length": 38.75, "epoch": 6.080416976917349, "grad_norm": 0.028224431510821473, "kl": 0.353515625, "learning_rate": 3.3947623188605777e-07, "loss": 0.0003541444311849773, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4080, "train_speed(iter/s)": 0.027386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 87.55208396911621, "completions/min_length": 36.5, "epoch": 6.081906180193596, "grad_norm": 0.003473885533836045, "kl": 0.3212890625, "learning_rate": 3.392522746609372e-07, "loss": 0.0003212334122508764, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4081, "train_speed(iter/s)": 0.027386 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 86.17708587646484, "completions/min_length": 37.75, "epoch": 6.083395383469844, "grad_norm": 0.00322906864267685, "kl": 0.30810546875, "learning_rate": 3.3902835339806456e-07, "loss": 0.0003075919521506876, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4082, "train_speed(iter/s)": 0.027388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 80.84375286102295, "completions/min_length": 27.75, "epoch": 6.084884586746091, "grad_norm": 0.003145413099187939, "kl": 0.33349609375, "learning_rate": 3.388044681475353e-07, "loss": 0.0003335996880196035, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4083, "train_speed(iter/s)": 0.027388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 92.04166984558105, "completions/min_length": 33.25, "epoch": 6.086373790022338, "grad_norm": 0.003327249912935868, "kl": 0.29150390625, "learning_rate": 3.3858061895943647e-07, "loss": 0.00029027258278802037, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4084, "train_speed(iter/s)": 0.027389 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 81.33333587646484, "completions/min_length": 31.5, "epoch": 6.087862993298585, "grad_norm": 0.003093613455104674, "kl": 0.32177734375, "learning_rate": 3.383568058838476e-07, "loss": 0.00032148920581676066, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4085, "train_speed(iter/s)": 0.02739 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 90.02083587646484, "completions/min_length": 32.5, "epoch": 6.089352196574833, "grad_norm": 2.2597911912756365, "kl": 0.30126953125, "learning_rate": 3.381330289708395e-07, "loss": -0.02677382528781891, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.11572097800672054, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.4749870151281357, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4086, "train_speed(iter/s)": 0.027387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 78.87500190734863, "completions/min_length": 27.5, "epoch": 6.0908413998510795, "grad_norm": 1.0339912980182513, "kl": 0.3193359375, "learning_rate": 3.3790928827047546e-07, "loss": -0.0012519306037575006, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4087, "train_speed(iter/s)": 0.027388 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 89.84375190734863, "completions/min_length": 33.75, "epoch": 6.092330603127327, "grad_norm": 0.0033157119442693383, "kl": 0.262939453125, "learning_rate": 3.376855838328102e-07, "loss": 0.0002631702518556267, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4088, "train_speed(iter/s)": 0.027387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 81.17708587646484, "completions/min_length": 35.0, "epoch": 6.093819806403574, "grad_norm": 0.003239284661891255, "kl": 0.3271484375, "learning_rate": 3.374619157078908e-07, "loss": 0.0003272031608503312, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4089, "train_speed(iter/s)": 0.027387 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 83.65625190734863, "completions/min_length": 33.5, "epoch": 6.095309009679822, "grad_norm": 0.0029513940124402355, "kl": 0.306640625, "learning_rate": 3.3723828394575584e-07, "loss": 0.00030669558327645063, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4090, "train_speed(iter/s)": 0.027385 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.5, "completions/mean_length": 101.20833587646484, "completions/min_length": 32.0, "epoch": 6.0967982129560685, "grad_norm": 0.0029854817954846893, "kl": 0.250244140625, "learning_rate": 3.370146885964358e-07, "loss": 0.00024968525394797325, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4091, "train_speed(iter/s)": 0.027383 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 81.30208587646484, "completions/min_length": 36.25, "epoch": 6.098287416232315, "grad_norm": 0.003307339972425406, "kl": 0.3095703125, "learning_rate": 3.367911297099534e-07, "loss": 0.0003096915897913277, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4092, "train_speed(iter/s)": 0.027384 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.25, "completions/mean_length": 89.38542175292969, "completions/min_length": 34.75, "epoch": 6.099776619508563, "grad_norm": 0.002829450004597222, "kl": 0.30029296875, "learning_rate": 3.365676073363225e-07, "loss": 0.00030062347650527954, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4093, "train_speed(iter/s)": 0.027381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.5, "completions/mean_length": 82.17708587646484, "completions/min_length": 33.5, "epoch": 6.10126582278481, "grad_norm": 1.3432231490160558, "kl": 0.306640625, "learning_rate": 3.3634412152554945e-07, "loss": 0.00026564853033050895, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4094, "train_speed(iter/s)": 0.02738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.75, "completions/mean_length": 93.48958587646484, "completions/min_length": 36.75, "epoch": 6.1027550260610575, "grad_norm": 0.0030561556925048426, "kl": 0.275634765625, "learning_rate": 3.3612067232763226e-07, "loss": 0.00027570471866056323, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4095, "train_speed(iter/s)": 0.027379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 74.57291984558105, "completions/min_length": 22.75, "epoch": 6.104244229337304, "grad_norm": 1.5844538965225914, "kl": 0.31982421875, "learning_rate": 3.358972597925604e-07, "loss": 0.015757175162434578, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4096, "train_speed(iter/s)": 0.02738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 78.83333587646484, "completions/min_length": 19.25, "epoch": 6.105733432613552, "grad_norm": 0.003056299282423682, "kl": 0.3291015625, "learning_rate": 3.356738839703158e-07, "loss": 0.0003290584427304566, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4097, "train_speed(iter/s)": 0.02738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 86.63542175292969, "completions/min_length": 32.25, "epoch": 6.107222635889799, "grad_norm": 1.4063793199260042, "kl": 0.284423828125, "learning_rate": 3.354505449108712e-07, "loss": 0.009637047536671162, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4098, "train_speed(iter/s)": 0.027379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.75, "completions/mean_length": 88.19791793823242, "completions/min_length": 35.25, "epoch": 6.108711839166046, "grad_norm": 0.0066203871546482666, "kl": 0.30419921875, "learning_rate": 3.352272426641921e-07, "loss": 0.00030424955184571445, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4099, "train_speed(iter/s)": 0.027379 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.0, "completions/mean_length": 82.96875190734863, "completions/min_length": 34.5, "epoch": 6.110201042442293, "grad_norm": 0.0032703221951193495, "kl": 0.32666015625, "learning_rate": 3.3500397728023534e-07, "loss": 0.0003264431143179536, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4100, "train_speed(iter/s)": 0.027378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 86.33333396911621, "completions/min_length": 34.5, "epoch": 6.111690245718541, "grad_norm": 0.0029746802930158533, "kl": 0.30126953125, "learning_rate": 3.347807488089494e-07, "loss": 0.0003006409096997231, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4101, "train_speed(iter/s)": 0.02738 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 88.50000190734863, "completions/min_length": 35.25, "epoch": 6.113179448994788, "grad_norm": 0.0034570938675912697, "kl": 0.28857421875, "learning_rate": 3.3455755730027475e-07, "loss": 0.0002879712847061455, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4102, "train_speed(iter/s)": 0.027381 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 83.02083587646484, "completions/min_length": 32.0, "epoch": 6.114668652271035, "grad_norm": 1.474793544874887, "kl": 0.3291015625, "learning_rate": 3.3433440280414326e-07, "loss": -0.012812232598662376, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4103, "train_speed(iter/s)": 0.027378 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 84.86458587646484, "completions/min_length": 42.5, "epoch": 6.116157855547282, "grad_norm": 1.0744097209919423, "kl": 0.31982421875, "learning_rate": 3.3411128537047884e-07, "loss": 0.006732551380991936, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4104, "train_speed(iter/s)": 0.027375 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 80.22916984558105, "completions/min_length": 39.5, "epoch": 6.117647058823529, "grad_norm": 0.0030466494744637567, "kl": 0.32177734375, "learning_rate": 3.3388820504919724e-07, "loss": 0.00032147689489647746, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4105, "train_speed(iter/s)": 0.027374 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 76.82291984558105, "completions/min_length": 34.5, "epoch": 6.119136262099777, "grad_norm": 0.0029850208414636263, "kl": 0.31494140625, "learning_rate": 3.3366516189020534e-07, "loss": 0.00031463379855267704, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4106, "train_speed(iter/s)": 0.027372 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 91.58333396911621, "completions/min_length": 42.25, "epoch": 6.1206254653760235, "grad_norm": 0.003411788988514982, "kl": 0.30078125, "learning_rate": 3.334421559434021e-07, "loss": 0.00030087627237662673, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4107, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 86.96875190734863, "completions/min_length": 40.25, "epoch": 6.122114668652271, "grad_norm": 0.0032741237273628404, "kl": 0.27490234375, "learning_rate": 3.332191872586781e-07, "loss": 0.0002751733991317451, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4108, "train_speed(iter/s)": 0.02737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 82.13541984558105, "completions/min_length": 37.75, "epoch": 6.123603871928518, "grad_norm": 1.2291630810608516, "kl": 0.31494140625, "learning_rate": 3.329962558859156e-07, "loss": 0.01813146471977234, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4109, "train_speed(iter/s)": 0.02737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.25, "completions/mean_length": 91.22916793823242, "completions/min_length": 33.5, "epoch": 6.125093075204766, "grad_norm": 1.518076952980936, "kl": 0.27880859375, "learning_rate": 3.3277336187498874e-07, "loss": 0.010737970471382141, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4110, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 75.96875190734863, "completions/min_length": 31.5, "epoch": 6.1265822784810124, "grad_norm": 0.0031132264809298838, "kl": 0.31494140625, "learning_rate": 3.325505052757625e-07, "loss": 0.00031483988277614117, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4111, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 77.96875190734863, "completions/min_length": 31.25, "epoch": 6.12807148175726, "grad_norm": 1.3197084261980652, "kl": 0.31298828125, "learning_rate": 3.3232768613809446e-07, "loss": 0.02322140894830227, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4112, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.0, "completions/mean_length": 80.72916793823242, "completions/min_length": 33.5, "epoch": 6.129560685033507, "grad_norm": 0.003340541115065157, "kl": 0.3203125, "learning_rate": 3.321049045118333e-07, "loss": 0.00032009618007577956, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4113, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 84.37500190734863, "completions/min_length": 30.0, "epoch": 6.131049888309755, "grad_norm": 2.190906182473545, "kl": 0.30712890625, "learning_rate": 3.318821604468192e-07, "loss": -0.016932785511016846, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4114, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 83.31250381469727, "completions/min_length": 35.75, "epoch": 6.132539091586001, "grad_norm": 1.095837900277428, "kl": 0.328125, "learning_rate": 3.316594539928844e-07, "loss": 0.022663084790110588, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4115, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 84.31250190734863, "completions/min_length": 42.75, "epoch": 6.134028294862249, "grad_norm": 0.00345350542352774, "kl": 0.290283203125, "learning_rate": 3.314367851998523e-07, "loss": 0.0002905103610828519, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4116, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.5, "completions/mean_length": 79.06250381469727, "completions/min_length": 28.25, "epoch": 6.135517498138496, "grad_norm": 0.0032155111369340816, "kl": 0.34033203125, "learning_rate": 3.3121415411753804e-07, "loss": 0.0003406807954888791, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4117, "train_speed(iter/s)": 0.027371 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.25, "completions/mean_length": 73.11458587646484, "completions/min_length": 30.0, "epoch": 6.137006701414743, "grad_norm": 0.3136269555268428, "kl": 0.533203125, "learning_rate": 3.3099156079574867e-07, "loss": 0.0005329186678864062, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4118, "train_speed(iter/s)": 0.027371 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.0, "completions/mean_length": 79.47916793823242, "completions/min_length": 37.5, "epoch": 6.13849590469099, "grad_norm": 0.0032067892793878313, "kl": 0.3056640625, "learning_rate": 3.3076900528428197e-07, "loss": 0.0003063240146730095, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4119, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.0, "completions/mean_length": 82.58333587646484, "completions/min_length": 31.0, "epoch": 6.139985107967237, "grad_norm": 0.0027126199157826445, "kl": 0.294921875, "learning_rate": 3.3054648763292797e-07, "loss": 0.00029467427521012723, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4120, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 79.42708587646484, "completions/min_length": 34.5, "epoch": 6.141474311243485, "grad_norm": 0.003254176950775464, "kl": 0.31103515625, "learning_rate": 3.3032400789146786e-07, "loss": 0.0003107499796897173, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4121, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 78.69791984558105, "completions/min_length": 33.5, "epoch": 6.142963514519732, "grad_norm": 0.003634034283002015, "kl": 0.3193359375, "learning_rate": 3.301015661096746e-07, "loss": 0.0003196917532477528, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4122, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.5, "completions/mean_length": 76.61458778381348, "completions/min_length": 29.0, "epoch": 6.144452717795979, "grad_norm": 0.00330972863172651, "kl": 0.33544921875, "learning_rate": 3.2987916233731273e-07, "loss": 0.00033543334575369954, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4123, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 76.18750190734863, "completions/min_length": 25.5, "epoch": 6.145941921072226, "grad_norm": 0.0030157736388546127, "kl": 0.2998046875, "learning_rate": 3.296567966241377e-07, "loss": 0.0002996666298713535, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4124, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 76.11458587646484, "completions/min_length": 27.5, "epoch": 6.147431124348474, "grad_norm": 0.003738680096156821, "kl": 0.33544921875, "learning_rate": 3.2943446901989724e-07, "loss": 0.00033519527642056346, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4125, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.75, "completions/mean_length": 79.39583778381348, "completions/min_length": 30.0, "epoch": 6.148920327624721, "grad_norm": 1.3315383509281984, "kl": 0.29931640625, "learning_rate": 3.2921217957432967e-07, "loss": 0.03099413588643074, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4126, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 130.25, "completions/mean_length": 72.29166793823242, "completions/min_length": 28.75, "epoch": 6.150409530900968, "grad_norm": 0.0033872115621751274, "kl": 0.35791015625, "learning_rate": 3.2898992833716563e-07, "loss": 0.0003581349737942219, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4127, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 86.53125381469727, "completions/min_length": 36.25, "epoch": 6.151898734177215, "grad_norm": 0.0033162172196729563, "kl": 0.2861328125, "learning_rate": 3.287677153581269e-07, "loss": 0.00028672724147327244, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4128, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 76.35416984558105, "completions/min_length": 38.0, "epoch": 6.153387937453463, "grad_norm": 1.104604942745973, "kl": 0.33935546875, "learning_rate": 3.285455406869263e-07, "loss": 0.005202563013881445, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4129, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 75.04166984558105, "completions/min_length": 29.75, "epoch": 6.15487714072971, "grad_norm": 0.0037429238200249195, "kl": 0.29638671875, "learning_rate": 3.2832340437326887e-07, "loss": 0.0002968586341012269, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4130, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.5, "completions/mean_length": 82.46875190734863, "completions/min_length": 36.75, "epoch": 6.156366344005956, "grad_norm": 0.003344645315071922, "kl": 0.298828125, "learning_rate": 3.281013064668502e-07, "loss": 0.00029897812055423856, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4131, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 67.13541984558105, "completions/min_length": 25.5, "epoch": 6.157855547282204, "grad_norm": 0.003565280548215958, "kl": 0.37548828125, "learning_rate": 3.278792470173579e-07, "loss": 0.00037575908936560154, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4132, "train_speed(iter/s)": 0.027365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.75, "completions/mean_length": 76.29166984558105, "completions/min_length": 33.75, "epoch": 6.159344750558451, "grad_norm": 1.9053984470020195, "kl": 0.3154296875, "learning_rate": 3.276572260744709e-07, "loss": -0.004136192612349987, "memory(GiB)": 112.53, "reward": 1.541666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.541666679084301, "rewards/CineAccuracyORM/std": 0.4888460487127304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4133, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 72.98958492279053, "completions/min_length": 32.0, "epoch": 6.160833953834699, "grad_norm": 0.003467071636634919, "kl": 0.3359375, "learning_rate": 3.274352436878592e-07, "loss": 0.00033638038439676166, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4134, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.5, "completions/mean_length": 68.31250381469727, "completions/min_length": 27.0, "epoch": 6.162323157110945, "grad_norm": 0.00391133396298834, "kl": 0.3388671875, "learning_rate": 3.272132999071846e-07, "loss": 0.0003382685245014727, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4135, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.75, "completions/mean_length": 71.73958587646484, "completions/min_length": 28.0, "epoch": 6.163812360387193, "grad_norm": 0.003934507406771909, "kl": 0.37548828125, "learning_rate": 3.269913947820998e-07, "loss": 0.00037492834962904453, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4136, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 139.75, "completions/mean_length": 71.41666793823242, "completions/min_length": 26.0, "epoch": 6.16530156366344, "grad_norm": 2.054161773961991, "kl": 0.34130859375, "learning_rate": 3.2676952836224914e-07, "loss": 0.017464715987443924, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.3182126581668854, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4137, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 80.30208587646484, "completions/min_length": 34.75, "epoch": 6.1667907669396875, "grad_norm": 0.9641382572737374, "kl": 0.30859375, "learning_rate": 3.265477006972685e-07, "loss": -0.020397599786520004, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4138, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 82.27083778381348, "completions/min_length": 31.5, "epoch": 6.168279970215934, "grad_norm": 0.00428741627560777, "kl": 0.3095703125, "learning_rate": 3.2632591183678446e-07, "loss": 0.0003096828586421907, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4139, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.25, "completions/mean_length": 72.42708587646484, "completions/min_length": 35.5, "epoch": 6.169769173492182, "grad_norm": 0.003599821999173131, "kl": 0.34326171875, "learning_rate": 3.261041618304157e-07, "loss": 0.0003427740593906492, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4140, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 74.35416793823242, "completions/min_length": 34.5, "epoch": 6.171258376768429, "grad_norm": 0.9362464159272955, "kl": 0.33154296875, "learning_rate": 3.258824507277714e-07, "loss": 0.005971639417111874, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4141, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 80.04166793823242, "completions/min_length": 22.5, "epoch": 6.1727475800446765, "grad_norm": 0.002990308218247187, "kl": 0.3251953125, "learning_rate": 3.2566077857845265e-07, "loss": 0.00032476504566147923, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4142, "train_speed(iter/s)": 0.027362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 78.58333778381348, "completions/min_length": 25.0, "epoch": 6.174236783320923, "grad_norm": 0.003393908693184944, "kl": 0.3046875, "learning_rate": 3.2543914543205187e-07, "loss": 0.0003051672247238457, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4143, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.0, "completions/mean_length": 69.62500286102295, "completions/min_length": 30.5, "epoch": 6.17572598659717, "grad_norm": 0.0038068144744840275, "kl": 0.3173828125, "learning_rate": 3.252175513381521e-07, "loss": 0.00031707569723948836, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4144, "train_speed(iter/s)": 0.027365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 125.0, "completions/mean_length": 67.73958587646484, "completions/min_length": 30.0, "epoch": 6.177215189873418, "grad_norm": 1.7443384777928643, "kl": 0.33935546875, "learning_rate": 3.2499599634632826e-07, "loss": 0.015717757865786552, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4145, "train_speed(iter/s)": 0.027365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 72.63541793823242, "completions/min_length": 31.75, "epoch": 6.178704393149665, "grad_norm": 0.003335246102484236, "kl": 0.34521484375, "learning_rate": 3.247744805061462e-07, "loss": 0.00034500076435506344, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4146, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 75.96875190734863, "completions/min_length": 26.75, "epoch": 6.180193596425912, "grad_norm": 0.003475921375596632, "kl": 0.31396484375, "learning_rate": 3.2455300386716313e-07, "loss": 0.0003141057095490396, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4147, "train_speed(iter/s)": 0.027362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.0, "completions/mean_length": 71.19791984558105, "completions/min_length": 26.25, "epoch": 6.181682799702159, "grad_norm": 1.9569213534605703, "kl": 0.3369140625, "learning_rate": 3.243315664789278e-07, "loss": 0.005270741879940033, "memory(GiB)": 112.53, "reward": 1.4687500596046448, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.4687500176951289, "rewards/CineAccuracyORM/std": 0.38697611913084984, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4148, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.5, "completions/mean_length": 72.67708683013916, "completions/min_length": 25.75, "epoch": 6.183172002978407, "grad_norm": 0.003368254805366234, "kl": 0.3525390625, "learning_rate": 3.241101683909794e-07, "loss": 0.0003519640304148197, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4149, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 71.98958587646484, "completions/min_length": 26.5, "epoch": 6.1846612062546535, "grad_norm": 0.0038413601654392624, "kl": 0.32568359375, "learning_rate": 3.2388880965284915e-07, "loss": 0.00032541967811994255, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4150, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 71.64583492279053, "completions/min_length": 29.0, "epoch": 6.186150409530901, "grad_norm": 0.0036779536282970943, "kl": 0.35546875, "learning_rate": 3.2366749031405873e-07, "loss": 0.00035573513014242053, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4151, "train_speed(iter/s)": 0.027362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 79.96875190734863, "completions/min_length": 29.5, "epoch": 6.187639612807148, "grad_norm": 1.5867674211884948, "kl": 0.2978515625, "learning_rate": 3.2344621042412164e-07, "loss": -0.006215314380824566, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4152, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 153.0, "completions/mean_length": 75.20833587646484, "completions/min_length": 30.75, "epoch": 6.189128816083396, "grad_norm": 0.003091273065201769, "kl": 0.3330078125, "learning_rate": 3.2322497003254235e-07, "loss": 0.00033304045791737735, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4153, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 85.12500381469727, "completions/min_length": 33.75, "epoch": 6.1906180193596425, "grad_norm": 0.597293457155356, "kl": 0.30419921875, "learning_rate": 3.230037691888162e-07, "loss": 0.010794605128467083, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4154, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 78.51041984558105, "completions/min_length": 36.5, "epoch": 6.19210722263589, "grad_norm": 2.0255451021223116, "kl": 0.33056640625, "learning_rate": 3.2278260794243026e-07, "loss": -0.01704687811434269, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4155, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.0, "completions/mean_length": 75.83333492279053, "completions/min_length": 29.0, "epoch": 6.193596425912137, "grad_norm": 0.002904785172211812, "kl": 0.322265625, "learning_rate": 3.225614863428621e-07, "loss": 0.0003225566470064223, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4156, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.25, "completions/mean_length": 83.34375190734863, "completions/min_length": 36.5, "epoch": 6.195085629188384, "grad_norm": 0.003282386062452377, "kl": 0.29931640625, "learning_rate": 3.2234040443958077e-07, "loss": 0.0002989826607517898, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4157, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.75, "completions/mean_length": 79.48958587646484, "completions/min_length": 24.75, "epoch": 6.1965748324646315, "grad_norm": 0.7706232570240711, "kl": 0.310546875, "learning_rate": 3.221193622820465e-07, "loss": -0.012461218982934952, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4158, "train_speed(iter/s)": 0.027357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.0, "completions/mean_length": 70.94792079925537, "completions/min_length": 30.0, "epoch": 6.198064035740878, "grad_norm": 2.3259119556301044, "kl": 0.49609375, "learning_rate": 3.2189835991971046e-07, "loss": -0.03620590269565582, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6354166679084301, "rewards/CineAccuracyORM/std": 0.21880721300840378, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4159, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.5, "completions/mean_length": 78.75000190734863, "completions/min_length": 31.75, "epoch": 6.199553239017126, "grad_norm": 0.0036871118378820717, "kl": 0.32666015625, "learning_rate": 3.2167739740201516e-07, "loss": 0.00032663121237419546, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4160, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 78.95833587646484, "completions/min_length": 32.0, "epoch": 6.201042442293373, "grad_norm": 0.0034619247522930294, "kl": 0.3125, "learning_rate": 3.214564747783937e-07, "loss": 0.0003128411481156945, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4161, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.75, "completions/mean_length": 82.00000190734863, "completions/min_length": 33.75, "epoch": 6.2025316455696204, "grad_norm": 0.0035190767564189265, "kl": 0.30322265625, "learning_rate": 3.2123559209827057e-07, "loss": 0.0003031257074326277, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4162, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 77.90625286102295, "completions/min_length": 33.0, "epoch": 6.204020848845867, "grad_norm": 0.003524683493717299, "kl": 0.30029296875, "learning_rate": 3.2101474941106176e-07, "loss": 0.0003002595331054181, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4163, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 85.71875381469727, "completions/min_length": 40.25, "epoch": 6.205510052122115, "grad_norm": 1.6673859725843652, "kl": 0.2978515625, "learning_rate": 3.2079394676617346e-07, "loss": -0.012353580445051193, "memory(GiB)": 112.53, "reward": 1.604166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.604166679084301, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4164, "train_speed(iter/s)": 0.027362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.25, "completions/mean_length": 72.18750190734863, "completions/min_length": 25.75, "epoch": 6.206999255398362, "grad_norm": 0.847698936000448, "kl": 0.30419921875, "learning_rate": 3.205731842130034e-07, "loss": -0.019539572298526764, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4165, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.0, "completions/mean_length": 76.39583587646484, "completions/min_length": 29.75, "epoch": 6.208488458674609, "grad_norm": 0.003779270881264659, "kl": 0.328125, "learning_rate": 3.203524618009403e-07, "loss": 0.00032781908521428704, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4166, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 77.41666793823242, "completions/min_length": 29.0, "epoch": 6.209977661950856, "grad_norm": 0.004001030582975481, "kl": 0.3115234375, "learning_rate": 3.2013177957936375e-07, "loss": 0.0003116994339507073, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4167, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 81.89583587646484, "completions/min_length": 38.0, "epoch": 6.211466865227104, "grad_norm": 0.1449861302462376, "kl": 0.35498046875, "learning_rate": 3.1991113759764485e-07, "loss": 0.00035436026519164443, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4168, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.75, "completions/mean_length": 75.54166793823242, "completions/min_length": 28.5, "epoch": 6.212956068503351, "grad_norm": 1.5824206972213326, "kl": 0.328125, "learning_rate": 3.196905359051448e-07, "loss": 0.004352196119725704, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4169, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.75, "completions/mean_length": 83.20833587646484, "completions/min_length": 36.0, "epoch": 6.2144452717795975, "grad_norm": 0.0033469388649164404, "kl": 0.31298828125, "learning_rate": 3.1946997455121654e-07, "loss": 0.0003128355019725859, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4170, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 152.0, "completions/mean_length": 75.02083587646484, "completions/min_length": 25.0, "epoch": 6.215934475055845, "grad_norm": 0.0035583589505895105, "kl": 0.3056640625, "learning_rate": 3.1924945358520376e-07, "loss": 0.00030562104075215757, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4171, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.75, "completions/mean_length": 83.28125190734863, "completions/min_length": 33.75, "epoch": 6.217423678332092, "grad_norm": 0.004402291112116203, "kl": 0.302734375, "learning_rate": 3.1902897305644093e-07, "loss": 0.0003025862970389426, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4172, "train_speed(iter/s)": 0.027362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 79.34375190734863, "completions/min_length": 29.25, "epoch": 6.21891288160834, "grad_norm": 0.0033012796528153433, "kl": 0.3076171875, "learning_rate": 3.1880853301425404e-07, "loss": 0.00030745501862838864, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4173, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 78.50000381469727, "completions/min_length": 23.5, "epoch": 6.2204020848845865, "grad_norm": 0.6299643525527826, "kl": 0.32275390625, "learning_rate": 3.18588133507959e-07, "loss": -0.016569796949625015, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4174, "train_speed(iter/s)": 0.027364 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 80.78125381469727, "completions/min_length": 37.75, "epoch": 6.221891288160834, "grad_norm": 0.0032279678106256113, "kl": 0.30419921875, "learning_rate": 3.183677745868636e-07, "loss": 0.0003034953260794282, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4175, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 81.19791793823242, "completions/min_length": 35.5, "epoch": 6.223380491437081, "grad_norm": 0.0035726720874632036, "kl": 0.31689453125, "learning_rate": 3.181474563002664e-07, "loss": 0.00031672680051997304, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4176, "train_speed(iter/s)": 0.027364 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 84.16666793823242, "completions/min_length": 41.5, "epoch": 6.224869694713329, "grad_norm": 0.022079980966568893, "kl": 0.3154296875, "learning_rate": 3.179271786974563e-07, "loss": 0.00031494791619479656, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4177, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 75.77083587646484, "completions/min_length": 26.0, "epoch": 6.226358897989575, "grad_norm": 0.0032047490402907688, "kl": 0.3173828125, "learning_rate": 3.1770694182771384e-07, "loss": 0.0003176969476044178, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4178, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 77.63541984558105, "completions/min_length": 37.0, "epoch": 6.227848101265823, "grad_norm": 1.5299164847499003, "kl": 0.31884765625, "learning_rate": 3.174867457403098e-07, "loss": 0.011645236983895302, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4179, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 82.58333587646484, "completions/min_length": 35.25, "epoch": 6.22933730454207, "grad_norm": 0.005826664291061756, "kl": 0.3173828125, "learning_rate": 3.1726659048450623e-07, "loss": 0.0003173158329445869, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4180, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 81.12500381469727, "completions/min_length": 26.25, "epoch": 6.230826507818318, "grad_norm": 0.0033137172705299213, "kl": 0.30029296875, "learning_rate": 3.1704647610955617e-07, "loss": 0.00029967655427753925, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4181, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.75, "completions/mean_length": 81.81250190734863, "completions/min_length": 34.25, "epoch": 6.232315711094564, "grad_norm": 0.0032680438068439822, "kl": 0.2958984375, "learning_rate": 3.1682640266470284e-07, "loss": 0.00029573822394013405, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4182, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.5, "completions/mean_length": 84.03125190734863, "completions/min_length": 35.25, "epoch": 6.233804914370811, "grad_norm": 2.0135318736044283, "kl": 0.29443359375, "learning_rate": 3.166063701991811e-07, "loss": -0.0006900216685608029, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4183, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 80.76041984558105, "completions/min_length": 34.25, "epoch": 6.235294117647059, "grad_norm": 0.002996793688129868, "kl": 0.31591796875, "learning_rate": 3.163863787622162e-07, "loss": 0.00031570508144795895, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4184, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.25, "completions/mean_length": 80.92708587646484, "completions/min_length": 33.25, "epoch": 6.236783320923306, "grad_norm": 0.10715801018309544, "kl": 0.34765625, "learning_rate": 3.161664284030243e-07, "loss": 0.0003484422923065722, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4185, "train_speed(iter/s)": 0.02737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 84.12500190734863, "completions/min_length": 25.5, "epoch": 6.238272524199553, "grad_norm": 0.0030196853777852265, "kl": 0.30126953125, "learning_rate": 3.159465191708125e-07, "loss": 0.00030128876096569, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4186, "train_speed(iter/s)": 0.02737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 82.96875190734863, "completions/min_length": 28.75, "epoch": 6.2397617274758, "grad_norm": 0.0031125019598806465, "kl": 0.32666015625, "learning_rate": 3.1572665111477827e-07, "loss": 0.0003266194835305214, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4187, "train_speed(iter/s)": 0.027371 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.5, "completions/mean_length": 83.81250381469727, "completions/min_length": 37.75, "epoch": 6.241250930752048, "grad_norm": 1.3491032543353851, "kl": 0.28369140625, "learning_rate": 3.155068242841106e-07, "loss": 0.009687565267086029, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.12724433839321136, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4188, "train_speed(iter/s)": 0.02737 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.25, "completions/mean_length": 85.94791793823242, "completions/min_length": 29.75, "epoch": 6.242740134028295, "grad_norm": 1.3535740087043489, "kl": 0.28369140625, "learning_rate": 3.1528703872798836e-07, "loss": -0.03091488778591156, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4189, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 85.06250381469727, "completions/min_length": 34.5, "epoch": 6.244229337304542, "grad_norm": 0.0032615386989733467, "kl": 0.31201171875, "learning_rate": 3.150672944955818e-07, "loss": 0.00031211902387440205, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4190, "train_speed(iter/s)": 0.027364 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.5, "completions/mean_length": 90.55208396911621, "completions/min_length": 43.75, "epoch": 6.245718540580789, "grad_norm": 0.002746025494036675, "kl": 0.28076171875, "learning_rate": 3.14847591636052e-07, "loss": 0.0002810674486681819, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4191, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 86.37500190734863, "completions/min_length": 32.0, "epoch": 6.247207743857037, "grad_norm": 0.00870816236543939, "kl": 0.32275390625, "learning_rate": 3.1462793019855036e-07, "loss": 0.00032250123331323266, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4192, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 82.53125381469727, "completions/min_length": 37.25, "epoch": 6.248696947133284, "grad_norm": 0.003197489000474889, "kl": 0.30908203125, "learning_rate": 3.1440831023221946e-07, "loss": 0.00030856288503855467, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4193, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 85.83333778381348, "completions/min_length": 31.0, "epoch": 6.250186150409531, "grad_norm": 1.5369455626400783, "kl": 0.30908203125, "learning_rate": 3.14188731786192e-07, "loss": -0.0011979378759860992, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4194, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 82.92708778381348, "completions/min_length": 31.5, "epoch": 6.251675353685778, "grad_norm": 1.1727188698312956, "kl": 0.3095703125, "learning_rate": 3.1396919490959193e-07, "loss": -0.003759274957701564, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4195, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 83.11458587646484, "completions/min_length": 32.0, "epoch": 6.253164556962025, "grad_norm": 0.0029943888463753432, "kl": 0.32568359375, "learning_rate": 3.1374969965153386e-07, "loss": 0.00032535241916775703, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4196, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 90.31250190734863, "completions/min_length": 41.5, "epoch": 6.254653760238273, "grad_norm": 0.003268383534086908, "kl": 0.30126953125, "learning_rate": 3.1353024606112266e-07, "loss": 0.00030091367079876363, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4197, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 90.97916984558105, "completions/min_length": 34.75, "epoch": 6.256142963514519, "grad_norm": 0.004665870740894607, "kl": 0.31005859375, "learning_rate": 3.133108341874546e-07, "loss": 0.0003099565510638058, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4198, "train_speed(iter/s)": 0.027357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 86.13541793823242, "completions/min_length": 34.75, "epoch": 6.257632166790767, "grad_norm": 0.8033871821565994, "kl": 1.341796875, "learning_rate": 3.130914640796156e-07, "loss": 0.0013383881887421012, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4199, "train_speed(iter/s)": 0.027357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 77.96875381469727, "completions/min_length": 34.75, "epoch": 6.259121370067014, "grad_norm": 0.003380394778907631, "kl": 0.31787109375, "learning_rate": 3.1287213578668326e-07, "loss": 0.0003174251178279519, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4200, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 80.76041793823242, "completions/min_length": 34.75, "epoch": 6.2606105733432615, "grad_norm": 0.0034973575630565905, "kl": 0.32666015625, "learning_rate": 3.126528493577254e-07, "loss": 0.0003262861573603004, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4201, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 83.78125381469727, "completions/min_length": 32.75, "epoch": 6.262099776619508, "grad_norm": 0.00300567012587139, "kl": 0.298828125, "learning_rate": 3.124336048418001e-07, "loss": 0.0002987695042975247, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4202, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 90.11458587646484, "completions/min_length": 32.5, "epoch": 6.263588979895756, "grad_norm": 1.5797965268820255, "kl": 0.27392578125, "learning_rate": 3.1221440228795684e-07, "loss": -0.01767587661743164, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4203, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.0, "completions/mean_length": 84.27083587646484, "completions/min_length": 33.5, "epoch": 6.265078183172003, "grad_norm": 0.014870030104532353, "kl": 0.32666015625, "learning_rate": 3.119952417452348e-07, "loss": 0.00032644462771713734, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4204, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 84.12500190734863, "completions/min_length": 31.5, "epoch": 6.2665673864482505, "grad_norm": 0.003056922521696815, "kl": 0.2900390625, "learning_rate": 3.117761232626648e-07, "loss": 0.0002900071267504245, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4205, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.5, "completions/mean_length": 81.11458778381348, "completions/min_length": 31.5, "epoch": 6.268056589724497, "grad_norm": 0.003389207022269735, "kl": 0.32958984375, "learning_rate": 3.1155704688926754e-07, "loss": 0.00033018295653164387, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4206, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 79.20833587646484, "completions/min_length": 35.25, "epoch": 6.269545793000745, "grad_norm": 0.0033842769940079236, "kl": 0.33642578125, "learning_rate": 3.1133801267405423e-07, "loss": 0.0003363440919201821, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4207, "train_speed(iter/s)": 0.027355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 84.97916984558105, "completions/min_length": 40.75, "epoch": 6.271034996276992, "grad_norm": 0.0043702670135049556, "kl": 0.3203125, "learning_rate": 3.1111902066602724e-07, "loss": 0.0003198208869434893, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4208, "train_speed(iter/s)": 0.027356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.25, "completions/mean_length": 86.19791793823242, "completions/min_length": 34.5, "epoch": 6.272524199553239, "grad_norm": 1.0834709394945665, "kl": 0.296875, "learning_rate": 3.109000709141788e-07, "loss": 0.0068313954398036, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4209, "train_speed(iter/s)": 0.027357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 82.80208587646484, "completions/min_length": 33.25, "epoch": 6.274013402829486, "grad_norm": 1.1209647085874601, "kl": 0.30615234375, "learning_rate": 3.1068116346749216e-07, "loss": 0.007702210918068886, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4210, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 75.19791984558105, "completions/min_length": 32.5, "epoch": 6.275502606105733, "grad_norm": 0.003526226969106988, "kl": 0.314453125, "learning_rate": 3.104622983749412e-07, "loss": 0.00031451787799596786, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4211, "train_speed(iter/s)": 0.027356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.25, "completions/mean_length": 78.64583778381348, "completions/min_length": 37.75, "epoch": 6.276991809381981, "grad_norm": 0.0038568061755650577, "kl": 0.31005859375, "learning_rate": 3.102434756854898e-07, "loss": 0.0003088417579419911, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4212, "train_speed(iter/s)": 0.027357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 84.86458778381348, "completions/min_length": 33.5, "epoch": 6.2784810126582276, "grad_norm": 0.0033253081134328013, "kl": 0.30078125, "learning_rate": 3.10024695448093e-07, "loss": 0.0003006121260114014, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4213, "train_speed(iter/s)": 0.027356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.75, "completions/mean_length": 79.82291984558105, "completions/min_length": 35.5, "epoch": 6.279970215934475, "grad_norm": 1.614649827078257, "kl": 0.310546875, "learning_rate": 3.098059577116954e-07, "loss": -0.002400902798399329, "memory(GiB)": 112.53, "reward": 1.8437500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.2946811020374298, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4214, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 79.06250190734863, "completions/min_length": 42.25, "epoch": 6.281459419210722, "grad_norm": 0.003282101082475218, "kl": 0.31640625, "learning_rate": 3.0958726252523314e-07, "loss": 0.000316449673846364, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4215, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 84.92708778381348, "completions/min_length": 34.5, "epoch": 6.28294862248697, "grad_norm": 1.2101587059982528, "kl": 0.31298828125, "learning_rate": 3.0936860993763245e-07, "loss": 0.0403696671128273, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4216, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 83.92708396911621, "completions/min_length": 31.5, "epoch": 6.2844378257632165, "grad_norm": 0.00305246387608406, "kl": 0.31640625, "learning_rate": 3.0914999999780966e-07, "loss": 0.0003160227497573942, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4217, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 90.05208587646484, "completions/min_length": 36.25, "epoch": 6.285927029039464, "grad_norm": 0.0032750285510754176, "kl": 0.30810546875, "learning_rate": 3.0893143275467227e-07, "loss": 0.00030832085758447647, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4218, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 89.27083587646484, "completions/min_length": 29.5, "epoch": 6.287416232315711, "grad_norm": 2.2461255727402376, "kl": 0.30712890625, "learning_rate": 3.087129082571173e-07, "loss": -0.015980979427695274, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.0876726396381855, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.2224177122116089, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4219, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 89.42708587646484, "completions/min_length": 32.0, "epoch": 6.288905435591959, "grad_norm": 0.0034492887127973134, "kl": 0.298828125, "learning_rate": 3.0849442655403313e-07, "loss": 0.00029873286257497966, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4220, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 162.5, "completions/mean_length": 77.25000381469727, "completions/min_length": 30.75, "epoch": 6.2903946388682055, "grad_norm": 0.0034258020048025836, "kl": 0.3095703125, "learning_rate": 3.082759876942981e-07, "loss": 0.0003094755229540169, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4221, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 87.77083587646484, "completions/min_length": 40.0, "epoch": 6.291883842144452, "grad_norm": 0.003148038643845675, "kl": 0.2939453125, "learning_rate": 3.080575917267809e-07, "loss": 0.00029476662166416645, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4222, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 83.77083587646484, "completions/min_length": 34.0, "epoch": 6.2933730454207, "grad_norm": 0.044348655590061216, "kl": 0.3310546875, "learning_rate": 3.078392387003409e-07, "loss": 0.00033075554529204965, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4223, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 82.23958587646484, "completions/min_length": 32.75, "epoch": 6.294862248696947, "grad_norm": 0.005045841401846255, "kl": 0.32568359375, "learning_rate": 3.0762092866382785e-07, "loss": 0.0003253839968238026, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4224, "train_speed(iter/s)": 0.027357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.25, "completions/mean_length": 86.15625381469727, "completions/min_length": 40.0, "epoch": 6.2963514519731945, "grad_norm": 0.0031080972215229465, "kl": 0.30322265625, "learning_rate": 3.074026616660814e-07, "loss": 0.0003033754474017769, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4225, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 81.08333587646484, "completions/min_length": 32.25, "epoch": 6.297840655249441, "grad_norm": 0.0035326871656972962, "kl": 0.34423828125, "learning_rate": 3.0718443775593225e-07, "loss": 0.00034391030203551054, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4226, "train_speed(iter/s)": 0.027353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 79.32291984558105, "completions/min_length": 33.75, "epoch": 6.299329858525689, "grad_norm": 0.0030366348882252888, "kl": 0.3173828125, "learning_rate": 3.0696625698220085e-07, "loss": 0.0003177006437908858, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4227, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 85.86458587646484, "completions/min_length": 40.25, "epoch": 6.300819061801936, "grad_norm": 2.055319657122777, "kl": 0.2841796875, "learning_rate": 3.067481193936984e-07, "loss": -0.0040437919087708, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4228, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 83.50000190734863, "completions/min_length": 31.75, "epoch": 6.302308265078183, "grad_norm": 0.003611724650230225, "kl": 0.306640625, "learning_rate": 3.065300250392265e-07, "loss": 0.00030635562143288553, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4229, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 143.75, "completions/mean_length": 78.58333778381348, "completions/min_length": 39.75, "epoch": 6.30379746835443, "grad_norm": 1.1632986450963139, "kl": 0.3291015625, "learning_rate": 3.0631197396757665e-07, "loss": 0.0025070374831557274, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4230, "train_speed(iter/s)": 0.027356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 90.01041984558105, "completions/min_length": 39.75, "epoch": 6.305286671630678, "grad_norm": 0.002965657084099642, "kl": 0.291259765625, "learning_rate": 3.060939662275312e-07, "loss": 0.0002915235818363726, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4231, "train_speed(iter/s)": 0.027353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.0, "completions/mean_length": 84.51041793823242, "completions/min_length": 44.75, "epoch": 6.306775874906925, "grad_norm": 0.00368461509215954, "kl": 0.322265625, "learning_rate": 3.0587600186786216e-07, "loss": 0.00032263281173072755, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4232, "train_speed(iter/s)": 0.027356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 155.25, "completions/mean_length": 84.97917175292969, "completions/min_length": 42.25, "epoch": 6.308265078183172, "grad_norm": 0.0032317519499780755, "kl": 0.3193359375, "learning_rate": 3.0565808093733235e-07, "loss": 0.0003191315627191216, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4233, "train_speed(iter/s)": 0.027356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 82.97916793823242, "completions/min_length": 38.0, "epoch": 6.309754281459419, "grad_norm": 0.943416472228371, "kl": 0.33447265625, "learning_rate": 3.0544020348469497e-07, "loss": 0.00678927032276988, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4234, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 82.27083396911621, "completions/min_length": 31.75, "epoch": 6.311243484735666, "grad_norm": 1.6467439618172, "kl": 0.306640625, "learning_rate": 3.052223695586929e-07, "loss": -0.03278684616088867, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4235, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 85.11458587646484, "completions/min_length": 35.25, "epoch": 6.312732688011914, "grad_norm": 0.0034711840024447796, "kl": 0.2998046875, "learning_rate": 3.0500457920806e-07, "loss": 0.00030054114176891744, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4236, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 77.34375190734863, "completions/min_length": 38.75, "epoch": 6.3142218912881605, "grad_norm": 0.003386469936608415, "kl": 0.32373046875, "learning_rate": 3.0478683248151956e-07, "loss": 0.00032444531098008156, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4237, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.75, "completions/mean_length": 84.95833587646484, "completions/min_length": 35.25, "epoch": 6.315711094564408, "grad_norm": 2.0631736142317627, "kl": 0.30126953125, "learning_rate": 3.045691294277858e-07, "loss": -0.0009391861967742443, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4238, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 78.06250381469727, "completions/min_length": 36.25, "epoch": 6.317200297840655, "grad_norm": 0.004143124281727642, "kl": 0.313232421875, "learning_rate": 3.0435147009556303e-07, "loss": 0.0003136267187073827, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4239, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 83.04166889190674, "completions/min_length": 36.0, "epoch": 6.318689501116903, "grad_norm": 0.0030584202253769064, "kl": 0.32666015625, "learning_rate": 3.0413385453354543e-07, "loss": 0.00032664946047589183, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4240, "train_speed(iter/s)": 0.027361 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.5, "completions/mean_length": 83.41666793823242, "completions/min_length": 28.75, "epoch": 6.320178704393149, "grad_norm": 0.0033304353004496015, "kl": 0.2998046875, "learning_rate": 3.039162827904179e-07, "loss": 0.00029962940607219934, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4241, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 86.77083587646484, "completions/min_length": 37.0, "epoch": 6.321667907669397, "grad_norm": 0.003400534337642696, "kl": 0.302734375, "learning_rate": 3.0369875491485507e-07, "loss": 0.00030291886650957167, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4242, "train_speed(iter/s)": 0.027362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 88.26042175292969, "completions/min_length": 41.75, "epoch": 6.323157110945644, "grad_norm": 0.013875619432133413, "kl": 0.29638671875, "learning_rate": 3.0348127095552205e-07, "loss": 0.0002965055173262954, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4243, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 84.93750190734863, "completions/min_length": 32.75, "epoch": 6.324646314221892, "grad_norm": 0.8056961155154004, "kl": 0.2919921875, "learning_rate": 3.0326383096107423e-07, "loss": 0.014751373790204525, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4244, "train_speed(iter/s)": 0.027363 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 83.77083778381348, "completions/min_length": 34.5, "epoch": 6.326135517498138, "grad_norm": 1.493037076053883, "kl": 0.298828125, "learning_rate": 3.030464349801566e-07, "loss": -0.013471974991261959, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4245, "train_speed(iter/s)": 0.027365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 84.03125381469727, "completions/min_length": 36.0, "epoch": 6.327624720774386, "grad_norm": 0.003913493172764764, "kl": 0.304931640625, "learning_rate": 3.028290830614049e-07, "loss": 0.00030525849433615804, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4246, "train_speed(iter/s)": 0.027365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.5, "completions/mean_length": 90.85416793823242, "completions/min_length": 35.0, "epoch": 6.329113924050633, "grad_norm": 0.0028367688884698245, "kl": 0.27880859375, "learning_rate": 3.0261177525344455e-07, "loss": 0.0002786841941997409, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4247, "train_speed(iter/s)": 0.027365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.0, "completions/mean_length": 95.625, "completions/min_length": 40.25, "epoch": 6.33060312732688, "grad_norm": 0.002869979012458255, "kl": 0.275146484375, "learning_rate": 3.023945116048916e-07, "loss": 0.00027514185057953, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4248, "train_speed(iter/s)": 0.027365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 85.63541793823242, "completions/min_length": 31.0, "epoch": 6.332092330603127, "grad_norm": 0.010632812520127, "kl": 0.30908203125, "learning_rate": 3.0217729216435207e-07, "loss": 0.00030963384779170156, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4249, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 80.27083587646484, "completions/min_length": 36.75, "epoch": 6.333581533879374, "grad_norm": 1.2116628393454878, "kl": 0.3349609375, "learning_rate": 3.0196011698042156e-07, "loss": -0.005140307825058699, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4250, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 82.71875381469727, "completions/min_length": 36.0, "epoch": 6.335070737155622, "grad_norm": 0.9468181942431172, "kl": 0.32470703125, "learning_rate": 3.017429861016867e-07, "loss": -0.011457445099949837, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4251, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 83.89583778381348, "completions/min_length": 33.5, "epoch": 6.336559940431869, "grad_norm": 0.002998898352087007, "kl": 0.3046875, "learning_rate": 3.015258995767231e-07, "loss": 0.00030491355573758483, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4252, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 82.84375190734863, "completions/min_length": 28.5, "epoch": 6.338049143708116, "grad_norm": 0.006304063955824642, "kl": 0.337890625, "learning_rate": 3.013088574540974e-07, "loss": 0.00033820662065409124, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4253, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 83.79166984558105, "completions/min_length": 33.25, "epoch": 6.339538346984363, "grad_norm": 0.7303759697635583, "kl": 0.3310546875, "learning_rate": 3.01091859782366e-07, "loss": -0.01883811503648758, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4254, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.75, "completions/mean_length": 82.77083587646484, "completions/min_length": 37.5, "epoch": 6.341027550260611, "grad_norm": 0.0032550060868993145, "kl": 0.29443359375, "learning_rate": 3.008749066100751e-07, "loss": 0.0002939740370493382, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4255, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 92.82291793823242, "completions/min_length": 36.25, "epoch": 6.342516753536858, "grad_norm": 0.003219900188279598, "kl": 0.2958984375, "learning_rate": 3.006579979857614e-07, "loss": 0.00029606895986944437, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4256, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 89.66666793823242, "completions/min_length": 41.0, "epoch": 6.344005956813105, "grad_norm": 0.003084803301309906, "kl": 0.28173828125, "learning_rate": 3.0044113395795105e-07, "loss": 0.0002819101791828871, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4257, "train_speed(iter/s)": 0.027366 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 92.69791984558105, "completions/min_length": 34.25, "epoch": 6.345495160089352, "grad_norm": 0.003315648540525505, "kl": 0.2666015625, "learning_rate": 3.0022431457516073e-07, "loss": 0.00026671215891838074, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4258, "train_speed(iter/s)": 0.027365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 83.92708587646484, "completions/min_length": 38.75, "epoch": 6.3469843633656, "grad_norm": 0.003301048602284178, "kl": 0.32373046875, "learning_rate": 3.000075398858971e-07, "loss": 0.000323923013638705, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4259, "train_speed(iter/s)": 0.027367 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 100.28125190734863, "completions/min_length": 46.5, "epoch": 6.348473566641847, "grad_norm": 0.0030520734693020063, "kl": 0.27783203125, "learning_rate": 2.997908099386565e-07, "loss": 0.0002772454754449427, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4260, "train_speed(iter/s)": 0.027369 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 93.05208587646484, "completions/min_length": 39.75, "epoch": 6.349962769918093, "grad_norm": 0.0032553441157393173, "kl": 0.29833984375, "learning_rate": 2.995741247819256e-07, "loss": 0.0002991541987285018, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4261, "train_speed(iter/s)": 0.027368 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.5, "completions/mean_length": 96.15625381469727, "completions/min_length": 35.5, "epoch": 6.351451973194341, "grad_norm": 0.002686308952986066, "kl": 0.279296875, "learning_rate": 2.9935748446418065e-07, "loss": 0.00027932669036090374, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4262, "train_speed(iter/s)": 0.027365 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.5, "completions/mean_length": 88.01041793823242, "completions/min_length": 39.0, "epoch": 6.352941176470588, "grad_norm": 2.945477428876461, "kl": 0.30615234375, "learning_rate": 2.991408890338881e-07, "loss": 0.015381403267383575, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4263, "train_speed(iter/s)": 0.027362 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 90.92708587646484, "completions/min_length": 36.25, "epoch": 6.3544303797468356, "grad_norm": 0.8619803719784295, "kl": 0.29736328125, "learning_rate": 2.989243385395048e-07, "loss": -0.005816787015646696, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4264, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 90.43750190734863, "completions/min_length": 41.25, "epoch": 6.355919583023082, "grad_norm": 0.0029671230266139514, "kl": 0.27685546875, "learning_rate": 2.987078330294767e-07, "loss": 0.00027620623586699367, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4265, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 85.57291984558105, "completions/min_length": 44.75, "epoch": 6.35740878629933, "grad_norm": 0.003369870763110832, "kl": 0.30810546875, "learning_rate": 2.984913725522401e-07, "loss": 0.0003086069191340357, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4266, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.5, "completions/mean_length": 91.50000381469727, "completions/min_length": 41.75, "epoch": 6.358897989575577, "grad_norm": 1.9917612074526487, "kl": 0.35205078125, "learning_rate": 2.982749571562214e-07, "loss": -0.009297939948737621, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4267, "train_speed(iter/s)": 0.027359 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 93.98958587646484, "completions/min_length": 40.5, "epoch": 6.3603871928518245, "grad_norm": 0.0029187245712249117, "kl": 0.2958984375, "learning_rate": 2.9805858688983656e-07, "loss": 0.00029589247424155474, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4268, "train_speed(iter/s)": 0.027357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 89.64583587646484, "completions/min_length": 35.25, "epoch": 6.361876396128071, "grad_norm": 0.0032455684943690664, "kl": 0.28564453125, "learning_rate": 2.97842261801492e-07, "loss": 0.00028571151779033244, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4269, "train_speed(iter/s)": 0.027356 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 87.77083587646484, "completions/min_length": 33.0, "epoch": 6.363365599404319, "grad_norm": 0.0032198997774069258, "kl": 0.28173828125, "learning_rate": 2.9762598193958313e-07, "loss": 0.0002812196617014706, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4270, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.25, "completions/mean_length": 97.15625190734863, "completions/min_length": 41.75, "epoch": 6.364854802680566, "grad_norm": 0.003260394192848177, "kl": 0.270263671875, "learning_rate": 2.9740974735249625e-07, "loss": 0.0002700603799894452, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4271, "train_speed(iter/s)": 0.02736 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.75, "completions/mean_length": 101.92708587646484, "completions/min_length": 43.25, "epoch": 6.3663440059568135, "grad_norm": 0.0028771142156187885, "kl": 0.2802734375, "learning_rate": 2.971935580886066e-07, "loss": 0.00028045516228303313, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4272, "train_speed(iter/s)": 0.027358 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 86.58333587646484, "completions/min_length": 32.75, "epoch": 6.36783320923306, "grad_norm": 1.023553551224481, "kl": 0.31494140625, "learning_rate": 2.9697741419627997e-07, "loss": 0.013133665546774864, "memory(GiB)": 112.53, "reward": 1.90625, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4273, "train_speed(iter/s)": 0.027357 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 91.00000190734863, "completions/min_length": 42.5, "epoch": 6.369322412509307, "grad_norm": 0.0035496189232483814, "kl": 0.28369140625, "learning_rate": 2.96761315723872e-07, "loss": 0.0002833728212863207, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4274, "train_speed(iter/s)": 0.027355 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 90.78125190734863, "completions/min_length": 37.25, "epoch": 6.370811615785555, "grad_norm": 1.1403624519441093, "kl": 0.2900390625, "learning_rate": 2.965452627197274e-07, "loss": 0.00372404046356678, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4275, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 85.15625381469727, "completions/min_length": 37.0, "epoch": 6.372300819061802, "grad_norm": 1.327990703983741, "kl": 0.2900390625, "learning_rate": 2.9632925523218153e-07, "loss": -0.018388543277978897, "memory(GiB)": 112.53, "reward": 1.4270833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.4270833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4276, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 88.70833587646484, "completions/min_length": 41.0, "epoch": 6.373790022338049, "grad_norm": 0.00333781799856174, "kl": 0.28955078125, "learning_rate": 2.9611329330955947e-07, "loss": 0.00028916748124174774, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4277, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 101.21875381469727, "completions/min_length": 35.5, "epoch": 6.375279225614296, "grad_norm": 1.9396958226267211, "kl": 0.260986328125, "learning_rate": 2.9589737700017545e-07, "loss": -0.00031266361474990845, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.21429424732923508, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4278, "train_speed(iter/s)": 0.027354 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 92.01041984558105, "completions/min_length": 42.5, "epoch": 6.376768428890544, "grad_norm": 1.0727147804809174, "kl": 0.30517578125, "learning_rate": 2.956815063523343e-07, "loss": -0.004074291791766882, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4279, "train_speed(iter/s)": 0.027353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 100.10416984558105, "completions/min_length": 32.5, "epoch": 6.3782576321667905, "grad_norm": 1.0255630843021428, "kl": 0.30712890625, "learning_rate": 2.9546568141433e-07, "loss": 0.0062685273587703705, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4280, "train_speed(iter/s)": 0.027353 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 96.35416984558105, "completions/min_length": 41.0, "epoch": 6.379746835443038, "grad_norm": 0.0030892215392978113, "kl": 0.275390625, "learning_rate": 2.952499022344469e-07, "loss": 0.00027572346152737737, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4281, "train_speed(iter/s)": 0.027351 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 93.29166793823242, "completions/min_length": 33.25, "epoch": 6.381236038719285, "grad_norm": 0.9267712049789484, "kl": 0.28857421875, "learning_rate": 2.950341688609588e-07, "loss": 0.0018567496445029974, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4282, "train_speed(iter/s)": 0.027351 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 93.85416984558105, "completions/min_length": 38.25, "epoch": 6.382725241995533, "grad_norm": 0.00298259317138968, "kl": 0.30615234375, "learning_rate": 2.9481848134212893e-07, "loss": 0.0003056366113014519, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4283, "train_speed(iter/s)": 0.02735 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 92.85416984558105, "completions/min_length": 39.5, "epoch": 6.3842144452717795, "grad_norm": 0.8152730840401512, "kl": 0.31103515625, "learning_rate": 2.946028397262109e-07, "loss": -0.016491256654262543, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833386108279, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4284, "train_speed(iter/s)": 0.027349 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.0, "completions/mean_length": 96.02083587646484, "completions/min_length": 34.75, "epoch": 6.385703648548027, "grad_norm": 0.0027399180974443937, "kl": 0.2890625, "learning_rate": 2.9438724406144746e-07, "loss": 0.0002888574090320617, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4285, "train_speed(iter/s)": 0.027347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 87.28125190734863, "completions/min_length": 36.5, "epoch": 6.387192851824274, "grad_norm": 1.3289295579988107, "kl": 0.3056640625, "learning_rate": 2.941716943960716e-07, "loss": -0.014492439106106758, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4286, "train_speed(iter/s)": 0.027347 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 85.20833587646484, "completions/min_length": 36.75, "epoch": 6.388682055100521, "grad_norm": 0.004149840431093428, "kl": 0.31201171875, "learning_rate": 2.9395619077830593e-07, "loss": 0.0003114745195489377, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4287, "train_speed(iter/s)": 0.027346 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 91.98958587646484, "completions/min_length": 36.25, "epoch": 6.3901712583767685, "grad_norm": 0.0033119991306477573, "kl": 0.298828125, "learning_rate": 2.937407332563621e-07, "loss": 0.00029884520336054265, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4288, "train_speed(iter/s)": 0.027346 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 99.37500381469727, "completions/min_length": 47.25, "epoch": 6.391660461653015, "grad_norm": 0.0031956110537745587, "kl": 0.278564453125, "learning_rate": 2.935253218784425e-07, "loss": 0.0002786174009088427, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4289, "train_speed(iter/s)": 0.027346 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 97.89583587646484, "completions/min_length": 43.75, "epoch": 6.393149664929263, "grad_norm": 0.00337534002518866, "kl": 0.28173828125, "learning_rate": 2.933099566927381e-07, "loss": 0.00028146966360509396, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4290, "train_speed(iter/s)": 0.027346 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 91.82291984558105, "completions/min_length": 44.25, "epoch": 6.39463886820551, "grad_norm": 1.295547842161163, "kl": 0.30810546875, "learning_rate": 2.930946377474304e-07, "loss": -0.00869026780128479, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4291, "train_speed(iter/s)": 0.027344 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 93.86458587646484, "completions/min_length": 35.5, "epoch": 6.396128071481757, "grad_norm": 0.007761022802224902, "kl": 0.28173828125, "learning_rate": 2.928793650906903e-07, "loss": 0.0002817896893247962, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4292, "train_speed(iter/s)": 0.027343 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 88.47916984558105, "completions/min_length": 40.75, "epoch": 6.397617274758004, "grad_norm": 1.1737011578922518, "kl": 0.32763671875, "learning_rate": 2.9266413877067817e-07, "loss": 0.006379236932843924, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4293, "train_speed(iter/s)": 0.027341 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.75, "completions/mean_length": 98.32291984558105, "completions/min_length": 42.25, "epoch": 6.399106478034252, "grad_norm": 0.0030613696694067704, "kl": 0.28564453125, "learning_rate": 2.9244895883554436e-07, "loss": 0.00028574152383953333, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4294, "train_speed(iter/s)": 0.027341 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.5, "completions/mean_length": 100.93750381469727, "completions/min_length": 41.25, "epoch": 6.400595681310499, "grad_norm": 2.598419231800212, "kl": 0.31201171875, "learning_rate": 2.9223382533342824e-07, "loss": -0.019575875252485275, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4295, "train_speed(iter/s)": 0.02734 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 92.11458587646484, "completions/min_length": 42.75, "epoch": 6.402084884586746, "grad_norm": 0.5909749696203638, "kl": 0.2900390625, "learning_rate": 2.9201873831245925e-07, "loss": 0.01513106282800436, "memory(GiB)": 112.53, "reward": 1.59375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.59375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4296, "train_speed(iter/s)": 0.027337 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 91.82291984558105, "completions/min_length": 38.25, "epoch": 6.403574087862993, "grad_norm": 0.9967219454072226, "kl": 0.2822265625, "learning_rate": 2.918036978207565e-07, "loss": 0.016142258420586586, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4297, "train_speed(iter/s)": 0.027336 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 90.64583778381348, "completions/min_length": 41.5, "epoch": 6.405063291139241, "grad_norm": 0.0031599005626748145, "kl": 0.28466796875, "learning_rate": 2.9158870390642863e-07, "loss": 0.00028393184766173363, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4298, "train_speed(iter/s)": 0.027333 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 90.02083396911621, "completions/min_length": 38.75, "epoch": 6.406552494415488, "grad_norm": 0.0036817469752159077, "kl": 0.306640625, "learning_rate": 2.9137375661757354e-07, "loss": 0.00030622599297203124, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4299, "train_speed(iter/s)": 0.027331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.5, "completions/mean_length": 94.52083587646484, "completions/min_length": 32.0, "epoch": 6.4080416976917345, "grad_norm": 0.0029427908711599057, "kl": 0.3056640625, "learning_rate": 2.9115885600227873e-07, "loss": 0.000305810768622905, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4300, "train_speed(iter/s)": 0.02733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 93.35416984558105, "completions/min_length": 30.5, "epoch": 6.409530900967982, "grad_norm": 0.01762566440028661, "kl": 0.27392578125, "learning_rate": 2.90944002108622e-07, "loss": 0.0002740386698860675, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4301, "train_speed(iter/s)": 0.02733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 99.04166984558105, "completions/min_length": 26.5, "epoch": 6.411020104244229, "grad_norm": 0.005033636526833601, "kl": 0.28271484375, "learning_rate": 2.9072919498466987e-07, "loss": 0.0002829396107699722, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4302, "train_speed(iter/s)": 0.02733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 96.15625190734863, "completions/min_length": 41.0, "epoch": 6.412509307520477, "grad_norm": 0.003227651507198233, "kl": 0.28515625, "learning_rate": 2.9051443467847865e-07, "loss": 0.00028486776864156127, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4303, "train_speed(iter/s)": 0.02733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 93.39583587646484, "completions/min_length": 43.5, "epoch": 6.4139985107967235, "grad_norm": 1.7103493145428599, "kl": 0.298828125, "learning_rate": 2.9029972123809423e-07, "loss": 0.015980521216988564, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.48803938925266266, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4304, "train_speed(iter/s)": 0.027331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 91.34375190734863, "completions/min_length": 44.75, "epoch": 6.415487714072971, "grad_norm": 0.003336932306158035, "kl": 0.29296875, "learning_rate": 2.9008505471155196e-07, "loss": 0.00029274163534864783, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4305, "train_speed(iter/s)": 0.027329 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 95.57291984558105, "completions/min_length": 43.25, "epoch": 6.416976917349218, "grad_norm": 1.397769774140099, "kl": 0.298828125, "learning_rate": 2.898704351468766e-07, "loss": 0.0011761782225221395, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4306, "train_speed(iter/s)": 0.027331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 92.69791984558105, "completions/min_length": 40.75, "epoch": 6.418466120625466, "grad_norm": 0.004475768143673274, "kl": 0.30517578125, "learning_rate": 2.896558625920829e-07, "loss": 0.0003052910033147782, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4307, "train_speed(iter/s)": 0.027331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.25, "completions/mean_length": 88.72916984558105, "completions/min_length": 42.5, "epoch": 6.419955323901712, "grad_norm": 0.003634545375000141, "kl": 0.298583984375, "learning_rate": 2.894413370951745e-07, "loss": 0.0002987865300383419, "memory(GiB)": 112.53, "reward": 1.3333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.3333333432674408, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4308, "train_speed(iter/s)": 0.027331 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 99.35416793823242, "completions/min_length": 42.5, "epoch": 6.42144452717796, "grad_norm": 0.5878409688083616, "kl": 0.2880859375, "learning_rate": 2.892268587041447e-07, "loss": -0.011136703193187714, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4309, "train_speed(iter/s)": 0.02733 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/mean_length": 97.31250190734863, "completions/min_length": 41.75, "epoch": 6.422933730454207, "grad_norm": 0.003176845918457832, "kl": 0.2822265625, "learning_rate": 2.8901242746697634e-07, "loss": 0.0002821183006744832, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4310, "train_speed(iter/s)": 0.027329 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 93.50000381469727, "completions/min_length": 37.25, "epoch": 6.424422933730455, "grad_norm": 0.003375145521150394, "kl": 0.291015625, "learning_rate": 2.8879804343164147e-07, "loss": 0.00029112104675732553, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4311, "train_speed(iter/s)": 0.027329 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 98.67708587646484, "completions/min_length": 37.5, "epoch": 6.425912137006701, "grad_norm": 0.8849504170774275, "kl": 0.2900390625, "learning_rate": 2.8858370664610223e-07, "loss": -0.01809527538716793, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.17466487362980843, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4312, "train_speed(iter/s)": 0.027327 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 100.58333587646484, "completions/min_length": 45.0, "epoch": 6.427401340282948, "grad_norm": 1.4535068467976657, "kl": 0.28515625, "learning_rate": 2.883694171583094e-07, "loss": 0.012544493190944195, "memory(GiB)": 112.53, "reward": 1.4687500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.4939185827970505, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4313, "train_speed(iter/s)": 0.027327 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 93.38541984558105, "completions/min_length": 43.25, "epoch": 6.428890543559196, "grad_norm": 0.0035255531055796313, "kl": 0.29931640625, "learning_rate": 2.8815517501620356e-07, "loss": 0.00029858629568479955, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4314, "train_speed(iter/s)": 0.027329 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.75, "completions/mean_length": 99.04166793823242, "completions/min_length": 28.5, "epoch": 6.430379746835443, "grad_norm": 0.0030471308138059305, "kl": 0.2763671875, "learning_rate": 2.879409802677147e-07, "loss": 0.0002760868810582906, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4315, "train_speed(iter/s)": 0.027326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 96.25000190734863, "completions/min_length": 37.75, "epoch": 6.43186895011169, "grad_norm": 0.002924851540528351, "kl": 0.2822265625, "learning_rate": 2.8772683296076194e-07, "loss": 0.0002823550021275878, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4316, "train_speed(iter/s)": 0.027326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 95.43750190734863, "completions/min_length": 38.75, "epoch": 6.433358153387937, "grad_norm": 0.0030087509421002274, "kl": 0.27783203125, "learning_rate": 2.875127331432545e-07, "loss": 0.0002782190858852118, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4317, "train_speed(iter/s)": 0.027326 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/mean_length": 93.30208587646484, "completions/min_length": 41.0, "epoch": 6.434847356664185, "grad_norm": 0.0032306927306688573, "kl": 0.28662109375, "learning_rate": 2.8729868086308995e-07, "loss": 0.0002868513111025095, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4318, "train_speed(iter/s)": 0.027323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.25, "completions/mean_length": 102.04166984558105, "completions/min_length": 49.0, "epoch": 6.436336559940432, "grad_norm": 1.6161850228040753, "kl": 0.30517578125, "learning_rate": 2.87084676168156e-07, "loss": -0.005621693097054958, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.2397102490067482, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4319, "train_speed(iter/s)": 0.027322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 90.42708778381348, "completions/min_length": 34.25, "epoch": 6.437825763216679, "grad_norm": 1.0677299254833472, "kl": 0.306640625, "learning_rate": 2.8687071910632957e-07, "loss": 0.02804006263613701, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4320, "train_speed(iter/s)": 0.027322 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 97.29166984558105, "completions/min_length": 35.0, "epoch": 6.439314966492926, "grad_norm": 0.003993114657813716, "kl": 0.28173828125, "learning_rate": 2.866568097254765e-07, "loss": 0.00028210473828949034, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4321, "train_speed(iter/s)": 0.027323 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 99.30208396911621, "completions/min_length": 41.25, "epoch": 6.440804169769174, "grad_norm": 0.1808640446715514, "kl": 0.3828125, "learning_rate": 2.8644294807345286e-07, "loss": 0.00038273027166724205, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4322, "train_speed(iter/s)": 0.02732 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 93.35416984558105, "completions/min_length": 40.5, "epoch": 6.442293373045421, "grad_norm": 0.0033910223903492725, "kl": 0.2958984375, "learning_rate": 2.8622913419810266e-07, "loss": 0.00029618010739795864, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4323, "train_speed(iter/s)": 0.027317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.25, "completions/mean_length": 90.40625381469727, "completions/min_length": 37.75, "epoch": 6.443782576321668, "grad_norm": 0.003322874720332202, "kl": 0.30517578125, "learning_rate": 2.860153681472608e-07, "loss": 0.00030504792812280357, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4324, "train_speed(iter/s)": 0.027317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.25, "completions/mean_length": 98.18750190734863, "completions/min_length": 45.75, "epoch": 6.445271779597915, "grad_norm": 0.0036270857960333173, "kl": 0.29052734375, "learning_rate": 2.858016499687503e-07, "loss": 0.000290584284812212, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4325, "train_speed(iter/s)": 0.027319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 97.86458778381348, "completions/min_length": 36.0, "epoch": 6.446760982874162, "grad_norm": 1.8161954582086146, "kl": 0.28466796875, "learning_rate": 2.8558797971038396e-07, "loss": 0.04063864052295685, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4326, "train_speed(iter/s)": 0.027321 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.5, "completions/mean_length": 94.32292175292969, "completions/min_length": 34.0, "epoch": 6.44825018615041, "grad_norm": 0.0029995883070773194, "kl": 0.285400390625, "learning_rate": 2.8537435741996375e-07, "loss": 0.0002849758311640471, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4327, "train_speed(iter/s)": 0.027318 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 83.70833587646484, "completions/min_length": 41.0, "epoch": 6.449739389426656, "grad_norm": 0.0030852447842757117, "kl": 0.30810546875, "learning_rate": 2.851607831452808e-07, "loss": 0.0003077650035265833, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4328, "train_speed(iter/s)": 0.027317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.5, "completions/mean_length": 97.21875381469727, "completions/min_length": 49.5, "epoch": 6.451228592702904, "grad_norm": 0.003220033158131192, "kl": 0.263671875, "learning_rate": 2.8494725693411605e-07, "loss": 0.0002638135338202119, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4329, "train_speed(iter/s)": 0.027319 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 100.71875190734863, "completions/min_length": 34.25, "epoch": 6.452717795979151, "grad_norm": 1.6488797232902213, "kl": 0.2802734375, "learning_rate": 2.8473377883423897e-07, "loss": -0.0003456429985817522, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4330, "train_speed(iter/s)": 0.027315 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 100.67708587646484, "completions/min_length": 37.0, "epoch": 6.4542069992553985, "grad_norm": 0.004798603017239958, "kl": 0.27685546875, "learning_rate": 2.845203488934087e-07, "loss": 0.00027699617203325033, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4331, "train_speed(iter/s)": 0.027317 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.25, "completions/mean_length": 88.48958396911621, "completions/min_length": 32.0, "epoch": 6.455696202531645, "grad_norm": 0.0031519932436285965, "kl": 0.3056640625, "learning_rate": 2.8430696715937337e-07, "loss": 0.0003058405127376318, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4332, "train_speed(iter/s)": 0.027314 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.25, "completions/mean_length": 95.67708587646484, "completions/min_length": 40.0, "epoch": 6.457185405807893, "grad_norm": 0.0030413287709142437, "kl": 0.30322265625, "learning_rate": 2.8409363367987025e-07, "loss": 0.000303119421005249, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4333, "train_speed(iter/s)": 0.027312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.5, "completions/mean_length": 89.39583587646484, "completions/min_length": 41.75, "epoch": 6.45867460908414, "grad_norm": 0.003708854171828346, "kl": 0.28955078125, "learning_rate": 2.8388034850262646e-07, "loss": 0.00028976728208363056, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4334, "train_speed(iter/s)": 0.027311 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 91.27083587646484, "completions/min_length": 43.5, "epoch": 6.4601638123603875, "grad_norm": 0.0035286991742619392, "kl": 0.29638671875, "learning_rate": 2.8366711167535763e-07, "loss": 0.00029597949469462037, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4335, "train_speed(iter/s)": 0.027311 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 92.57291984558105, "completions/min_length": 34.75, "epoch": 6.461653015636634, "grad_norm": 0.0033748031467964934, "kl": 0.29541015625, "learning_rate": 2.834539232457687e-07, "loss": 0.00029504450503736734, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4336, "train_speed(iter/s)": 0.027313 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 108.28125381469727, "completions/min_length": 44.75, "epoch": 6.463142218912882, "grad_norm": 0.0030929543288746894, "kl": 0.27392578125, "learning_rate": 2.83240783261554e-07, "loss": 0.00027398482779972255, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4337, "train_speed(iter/s)": 0.02731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 102.44791793823242, "completions/min_length": 41.0, "epoch": 6.464631422189129, "grad_norm": 1.7104863469327376, "kl": 0.274658203125, "learning_rate": 2.830276917703968e-07, "loss": 0.024043826386332512, "memory(GiB)": 112.53, "reward": 1.6354167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4338, "train_speed(iter/s)": 0.027312 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.25, "completions/mean_length": 98.59375381469727, "completions/min_length": 43.25, "epoch": 6.466120625465376, "grad_norm": 0.6330436158075221, "kl": 0.55322265625, "learning_rate": 2.828146488199695e-07, "loss": -0.005665632896125317, "memory(GiB)": 112.53, "reward": 1.71875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.71875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4339, "train_speed(iter/s)": 0.027314 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 88.41666984558105, "completions/min_length": 39.25, "epoch": 6.467609828741623, "grad_norm": 0.006929860867968473, "kl": 0.3046875, "learning_rate": 2.8260165445793414e-07, "loss": 0.00030515098478645086, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4340, "train_speed(iter/s)": 0.027313 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 99.64583396911621, "completions/min_length": 36.75, "epoch": 6.46909903201787, "grad_norm": 0.059878099079475416, "kl": 0.28466796875, "learning_rate": 2.8238870873194134e-07, "loss": 0.00028438621666282415, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4341, "train_speed(iter/s)": 0.027311 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 101.58333396911621, "completions/min_length": 45.0, "epoch": 6.470588235294118, "grad_norm": 1.0720980268983957, "kl": 0.265625, "learning_rate": 2.821758116896309e-07, "loss": 0.00998102966696024, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4342, "train_speed(iter/s)": 0.027309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.0, "completions/mean_length": 92.71875190734863, "completions/min_length": 41.75, "epoch": 6.4720774385703645, "grad_norm": 0.0030237263347171785, "kl": 0.295654296875, "learning_rate": 2.819629633786319e-07, "loss": 0.00029524846468120813, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4343, "train_speed(iter/s)": 0.02731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.5, "completions/mean_length": 90.82291793823242, "completions/min_length": 44.25, "epoch": 6.473566641846612, "grad_norm": 1.8071448155630117, "kl": 0.29345703125, "learning_rate": 2.8175016384656226e-07, "loss": 0.025665132328867912, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.3477324768900871, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4344, "train_speed(iter/s)": 0.027307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 90.88541984558105, "completions/min_length": 45.25, "epoch": 6.475055845122859, "grad_norm": 0.011085924455085465, "kl": 0.3076171875, "learning_rate": 2.815374131410295e-07, "loss": 0.00030726264230906963, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4345, "train_speed(iter/s)": 0.027306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 89.14583587646484, "completions/min_length": 42.25, "epoch": 6.476545048399107, "grad_norm": 1.495505741678015, "kl": 0.28857421875, "learning_rate": 2.813247113096299e-07, "loss": 0.002002413384616375, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4346, "train_speed(iter/s)": 0.027307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 85.35416984558105, "completions/min_length": 36.25, "epoch": 6.4780342516753535, "grad_norm": 0.003201302333376885, "kl": 0.31689453125, "learning_rate": 2.811120583999487e-07, "loss": 0.00031670607859268785, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4347, "train_speed(iter/s)": 0.027307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 101.55208587646484, "completions/min_length": 41.5, "epoch": 6.479523454951601, "grad_norm": 0.7704156113623646, "kl": 0.27880859375, "learning_rate": 2.808994544595602e-07, "loss": 0.016165371984243393, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4348, "train_speed(iter/s)": 0.027308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.5, "completions/mean_length": 95.89583396911621, "completions/min_length": 40.0, "epoch": 6.481012658227848, "grad_norm": 0.0029966611771614585, "kl": 0.29052734375, "learning_rate": 2.8068689953602774e-07, "loss": 0.0002903067506849766, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4349, "train_speed(iter/s)": 0.02731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/mean_length": 102.27083587646484, "completions/min_length": 40.25, "epoch": 6.482501861504096, "grad_norm": 1.4118613094940786, "kl": 0.27783203125, "learning_rate": 2.804743936769043e-07, "loss": 0.0022627003490924835, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4350, "train_speed(iter/s)": 0.027308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 86.52083396911621, "completions/min_length": 41.5, "epoch": 6.4839910647803425, "grad_norm": 0.0029488437407455288, "kl": 0.29931640625, "learning_rate": 2.802619369297309e-07, "loss": 0.0002990323700942099, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4351, "train_speed(iter/s)": 0.027309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 96.23958587646484, "completions/min_length": 35.75, "epoch": 6.485480268056589, "grad_norm": 0.006598579617782723, "kl": 0.268310546875, "learning_rate": 2.8004952934203837e-07, "loss": 0.0002683747443370521, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4352, "train_speed(iter/s)": 0.02731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 94.45833587646484, "completions/min_length": 38.75, "epoch": 6.486969471332837, "grad_norm": 0.002900099776909298, "kl": 0.2861328125, "learning_rate": 2.7983717096134603e-07, "loss": 0.0002859504893422127, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4353, "train_speed(iter/s)": 0.027309 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 92.62500190734863, "completions/min_length": 32.0, "epoch": 6.488458674609084, "grad_norm": 1.8708260808647657, "kl": 0.28759765625, "learning_rate": 2.796248618351622e-07, "loss": 0.015657665207982063, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4354, "train_speed(iter/s)": 0.027308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.5, "completions/mean_length": 96.67708587646484, "completions/min_length": 39.75, "epoch": 6.4899478778853315, "grad_norm": 0.002801935656196457, "kl": 0.3017578125, "learning_rate": 2.794126020109851e-07, "loss": 0.00030196807347238064, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4355, "train_speed(iter/s)": 0.027305 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.0, "completions/mean_length": 110.69791984558105, "completions/min_length": 45.75, "epoch": 6.491437081161578, "grad_norm": 0.6955120904380081, "kl": 0.258544921875, "learning_rate": 2.792003915363003e-07, "loss": 0.03437924012541771, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4356, "train_speed(iter/s)": 0.027305 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.25, "completions/mean_length": 88.54167175292969, "completions/min_length": 41.0, "epoch": 6.492926284437826, "grad_norm": 1.1538517610355814, "kl": 0.30078125, "learning_rate": 2.7898823045858377e-07, "loss": -0.022389722988009453, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4357, "train_speed(iter/s)": 0.027304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 90.14583587646484, "completions/min_length": 41.0, "epoch": 6.494415487714073, "grad_norm": 0.002907627260049251, "kl": 0.28564453125, "learning_rate": 2.7877611882529974e-07, "loss": 0.0002853593323379755, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4358, "train_speed(iter/s)": 0.027304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 91.53125381469727, "completions/min_length": 39.0, "epoch": 6.49590469099032, "grad_norm": 0.0029261216805300475, "kl": 0.29443359375, "learning_rate": 2.785640566839013e-07, "loss": 0.00029431231087073684, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4359, "train_speed(iter/s)": 0.027306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 87.88541984558105, "completions/min_length": 31.25, "epoch": 6.497393894266567, "grad_norm": 1.9516522156682807, "kl": 0.2958984375, "learning_rate": 2.7835204408183133e-07, "loss": 0.0032836589962244034, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4360, "train_speed(iter/s)": 0.027307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 86.04166984558105, "completions/min_length": 32.0, "epoch": 6.498883097542815, "grad_norm": 0.003372437000975152, "kl": 0.32861328125, "learning_rate": 2.781400810665201e-07, "loss": 0.000328647845890373, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4361, "train_speed(iter/s)": 0.027307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.25, "completions/mean_length": 94.52083587646484, "completions/min_length": 43.0, "epoch": 6.500372300819062, "grad_norm": 3.696875582274483, "kl": 0.302001953125, "learning_rate": 2.779281676853883e-07, "loss": 0.008041037246584892, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4362, "train_speed(iter/s)": 0.027308 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 83.33333778381348, "completions/min_length": 37.75, "epoch": 6.501861504095309, "grad_norm": 2.7997713063203973, "kl": 0.3037109375, "learning_rate": 2.7771630398584477e-07, "loss": -0.0012374179204925895, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4363, "train_speed(iter/s)": 0.02731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 92.87500381469727, "completions/min_length": 39.75, "epoch": 6.503350707371556, "grad_norm": 0.008503002627496986, "kl": 0.29638671875, "learning_rate": 2.775044900152873e-07, "loss": 0.00029657132108695805, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4364, "train_speed(iter/s)": 0.02731 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 83.03125190734863, "completions/min_length": 32.25, "epoch": 6.504839910647803, "grad_norm": 0.003355211793582003, "kl": 0.30126953125, "learning_rate": 2.772927258211025e-07, "loss": 0.00030053683440200984, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4365, "train_speed(iter/s)": 0.027311 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.5, "completions/mean_length": 84.46875190734863, "completions/min_length": 40.0, "epoch": 6.506329113924051, "grad_norm": 0.0028550345891865626, "kl": 0.29931640625, "learning_rate": 2.7708101145066584e-07, "loss": 0.0002997115661855787, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4366, "train_speed(iter/s)": 0.027307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 89.80208587646484, "completions/min_length": 35.25, "epoch": 6.5078183172002975, "grad_norm": 1.0822913868653197, "kl": 0.27587890625, "learning_rate": 2.7686934695134233e-07, "loss": 0.007887417450547218, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4367, "train_speed(iter/s)": 0.027307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 103.29166984558105, "completions/min_length": 35.75, "epoch": 6.509307520476545, "grad_norm": 1.3999645076177485, "kl": 0.27734375, "learning_rate": 2.766577323704845e-07, "loss": 0.006924870889633894, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4368, "train_speed(iter/s)": 0.027304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 92.08333396911621, "completions/min_length": 45.0, "epoch": 6.510796723752792, "grad_norm": 2.60069138473036, "kl": 0.327392578125, "learning_rate": 2.76446167755435e-07, "loss": 0.007177487015724182, "memory(GiB)": 112.53, "reward": 1.9375000298023224, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.9375000149011612, "rewards/CineAccuracyORM/std": 0.16575583815574646, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4369, "train_speed(iter/s)": 0.027305 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 90.59375381469727, "completions/min_length": 37.5, "epoch": 6.51228592702904, "grad_norm": 0.003125459082052267, "kl": 0.28515625, "learning_rate": 2.762346531535246e-07, "loss": 0.0002855598577298224, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4370, "train_speed(iter/s)": 0.027305 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.25, "completions/mean_length": 90.96875190734863, "completions/min_length": 39.0, "epoch": 6.513775130305286, "grad_norm": 0.002954692787135967, "kl": 0.271484375, "learning_rate": 2.7602318861207276e-07, "loss": 0.0002713238063734025, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4371, "train_speed(iter/s)": 0.027307 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.5, "completions/mean_length": 88.78125190734863, "completions/min_length": 39.5, "epoch": 6.515264333581534, "grad_norm": 0.7749490126405929, "kl": 0.29638671875, "learning_rate": 2.758117741783887e-07, "loss": 0.024549899622797966, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4372, "train_speed(iter/s)": 0.027306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.75, "completions/mean_length": 95.34375381469727, "completions/min_length": 35.75, "epoch": 6.516753536857781, "grad_norm": 0.002893224665447984, "kl": 0.30419921875, "learning_rate": 2.756004098997689e-07, "loss": 0.0003043925389647484, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4373, "train_speed(iter/s)": 0.027304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.5, "completions/mean_length": 93.97916984558105, "completions/min_length": 41.75, "epoch": 6.518242740134029, "grad_norm": 1.7032128784340106, "kl": 0.30224609375, "learning_rate": 2.7538909582350013e-07, "loss": 0.00246944441460073, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4374, "train_speed(iter/s)": 0.027306 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 91.91666793823242, "completions/min_length": 41.0, "epoch": 6.519731943410275, "grad_norm": 0.0029302250644755827, "kl": 0.2822265625, "learning_rate": 2.7517783199685696e-07, "loss": 0.0002818944922182709, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4375, "train_speed(iter/s)": 0.027305 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.5, "completions/mean_length": 79.35416793823242, "completions/min_length": 42.75, "epoch": 6.521221146686523, "grad_norm": 2.327845552833535, "kl": 0.30419921875, "learning_rate": 2.749666184671032e-07, "loss": 0.008588230237364769, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4376, "train_speed(iter/s)": 0.027304 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 87.95833778381348, "completions/min_length": 38.0, "epoch": 6.52271034996277, "grad_norm": 0.003079159063094382, "kl": 0.302734375, "learning_rate": 2.7475545528149106e-07, "loss": 0.00030252663418650627, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4377, "train_speed(iter/s)": 0.027302 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.25, "completions/mean_length": 85.73958396911621, "completions/min_length": 40.75, "epoch": 6.524199553239017, "grad_norm": 0.003479941498161238, "kl": 0.29736328125, "learning_rate": 2.745443424872615e-07, "loss": 0.0002970668429043144, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4378, "train_speed(iter/s)": 0.027303 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.5, "completions/mean_length": 91.52083587646484, "completions/min_length": 42.25, "epoch": 6.525688756515264, "grad_norm": 0.8583536610354205, "kl": 0.28662109375, "learning_rate": 2.7433328013164494e-07, "loss": -0.012919828295707703, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4379, "train_speed(iter/s)": 0.027302 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.5, "completions/mean_length": 83.70833587646484, "completions/min_length": 33.5, "epoch": 6.527177959791511, "grad_norm": 0.0030631515919804504, "kl": 0.3408203125, "learning_rate": 2.7412226826185954e-07, "loss": 0.0003407238982617855, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4380, "train_speed(iter/s)": 0.027299 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 97.68750381469727, "completions/min_length": 47.25, "epoch": 6.528667163067759, "grad_norm": 0.77088542348887, "kl": 0.281494140625, "learning_rate": 2.739113069251126e-07, "loss": -0.0033253133296966553, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4381, "train_speed(iter/s)": 0.027294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 88.06250190734863, "completions/min_length": 38.25, "epoch": 6.530156366344006, "grad_norm": 0.003541318984296092, "kl": 0.30078125, "learning_rate": 2.737003961686e-07, "loss": 0.00030060610151849687, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4382, "train_speed(iter/s)": 0.027294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 91.42708778381348, "completions/min_length": 36.0, "epoch": 6.531645569620253, "grad_norm": 1.8559133731540391, "kl": 0.2822265625, "learning_rate": 2.734895360395066e-07, "loss": -0.02406392991542816, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4383, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 91.37500381469727, "completions/min_length": 42.0, "epoch": 6.5331347728965, "grad_norm": 0.0030693762572627814, "kl": 0.31689453125, "learning_rate": 2.732787265850057e-07, "loss": 0.00031689938623458147, "memory(GiB)": 112.53, "reward": 1.2500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.2500000074505806, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4384, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 86.72916793823242, "completions/min_length": 36.5, "epoch": 6.534623976172748, "grad_norm": 1.169697953947422, "kl": 0.3310546875, "learning_rate": 2.7306796785225917e-07, "loss": -0.0003109381068497896, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833386108279, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4385, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 91.53125190734863, "completions/min_length": 35.5, "epoch": 6.536113179448995, "grad_norm": 1.970873747608096, "kl": 0.2919921875, "learning_rate": 2.7285725988841774e-07, "loss": 0.009537136182188988, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4386, "train_speed(iter/s)": 0.027291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 86.90625190734863, "completions/min_length": 33.25, "epoch": 6.537602382725242, "grad_norm": 0.003124395325878449, "kl": 0.294189453125, "learning_rate": 2.726466027406204e-07, "loss": 0.0002941591083072126, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4387, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 92.81250190734863, "completions/min_length": 42.5, "epoch": 6.539091586001489, "grad_norm": 0.0028474085787506967, "kl": 0.29052734375, "learning_rate": 2.7243599645599574e-07, "loss": 0.00029034010367468, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4388, "train_speed(iter/s)": 0.027291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 106.97917175292969, "completions/min_length": 46.0, "epoch": 6.540580789277737, "grad_norm": 0.0028748751924670955, "kl": 0.264404296875, "learning_rate": 2.7222544108165946e-07, "loss": 0.00026413140585646033, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4389, "train_speed(iter/s)": 0.027292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 93.51041984558105, "completions/min_length": 51.75, "epoch": 6.542069992553984, "grad_norm": 4.109032375579775, "kl": 0.3427734375, "learning_rate": 2.7201493666471733e-07, "loss": -0.013676421716809273, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.09750141017138958, "rewards/CineAccuracyORM/mean": 0.7708333507180214, "rewards/CineAccuracyORM/std": 0.2986612282693386, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4390, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.25, "completions/mean_length": 82.33333778381348, "completions/min_length": 41.5, "epoch": 6.54355919583023, "grad_norm": 0.003364980075288297, "kl": 0.32080078125, "learning_rate": 2.718044832522628e-07, "loss": 0.0003203584346920252, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4391, "train_speed(iter/s)": 0.027295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 95.16666793823242, "completions/min_length": 41.75, "epoch": 6.545048399106478, "grad_norm": 0.0027781952325387185, "kl": 0.2822265625, "learning_rate": 2.715940808913781e-07, "loss": 0.0002823022077791393, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4392, "train_speed(iter/s)": 0.027295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 96.94791984558105, "completions/min_length": 38.5, "epoch": 6.546537602382725, "grad_norm": 2.505715112328988, "kl": 0.29150390625, "learning_rate": 2.713837296291347e-07, "loss": 0.009576157666742802, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3568698540329933, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4393, "train_speed(iter/s)": 0.027295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 97.41666984558105, "completions/min_length": 45.75, "epoch": 6.5480268056589725, "grad_norm": 2.228228347312636, "kl": 0.26904296875, "learning_rate": 2.7117342951259126e-07, "loss": -0.007784421555697918, "memory(GiB)": 112.53, "reward": 1.5208334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5208333507180214, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4394, "train_speed(iter/s)": 0.027295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 92.21875381469727, "completions/min_length": 46.75, "epoch": 6.549516008935219, "grad_norm": 1.3487779576407946, "kl": 0.30419921875, "learning_rate": 2.7096318058879646e-07, "loss": -0.018936660140752792, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1550404578447342, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4395, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 95.63541793823242, "completions/min_length": 38.0, "epoch": 6.551005212211467, "grad_norm": 0.00326909497662782, "kl": 0.29150390625, "learning_rate": 2.7075298290478665e-07, "loss": 0.000292045995593071, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4396, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 86.36458587646484, "completions/min_length": 33.0, "epoch": 6.552494415487714, "grad_norm": 1.2889287953938162, "kl": 0.3115234375, "learning_rate": 2.705428365075868e-07, "loss": -0.004374339245259762, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4397, "train_speed(iter/s)": 0.02729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 93.33333587646484, "completions/min_length": 38.75, "epoch": 6.5539836187639615, "grad_norm": 0.6790519478922383, "kl": 1.796875, "learning_rate": 2.7033274144421113e-07, "loss": -0.0031648650765419006, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4398, "train_speed(iter/s)": 0.027291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 89.72916984558105, "completions/min_length": 46.5, "epoch": 6.555472822040208, "grad_norm": 0.005048860639480333, "kl": 0.27880859375, "learning_rate": 2.70122697761661e-07, "loss": 0.0002793821622617543, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4399, "train_speed(iter/s)": 0.027291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 90.21875190734863, "completions/min_length": 35.5, "epoch": 6.556962025316456, "grad_norm": 1.3033531352463965, "kl": 0.29443359375, "learning_rate": 2.699127055069279e-07, "loss": -0.022485755383968353, "memory(GiB)": 112.53, "reward": 1.5104166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4400, "train_speed(iter/s)": 0.027291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 90.08333587646484, "completions/min_length": 38.75, "epoch": 6.558451228592703, "grad_norm": 0.002935344098650266, "kl": 0.29736328125, "learning_rate": 2.697027647269902e-07, "loss": 0.0002977958647534251, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4401, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 89.28125381469727, "completions/min_length": 38.25, "epoch": 6.5599404318689505, "grad_norm": 1.9151394983577985, "kl": 0.31494140625, "learning_rate": 2.69492875468816e-07, "loss": 0.00702074496075511, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4402, "train_speed(iter/s)": 0.027292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 88.58333587646484, "completions/min_length": 34.25, "epoch": 6.561429635145197, "grad_norm": 0.0029568059862719494, "kl": 0.29931640625, "learning_rate": 2.6928303777936136e-07, "loss": 0.00029923321562819183, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4403, "train_speed(iter/s)": 0.027294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 95.14583587646484, "completions/min_length": 42.5, "epoch": 6.562918838421444, "grad_norm": 1.8607686339931997, "kl": 0.27685546875, "learning_rate": 2.690732517055707e-07, "loss": 0.017348742112517357, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2558748833835125, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4404, "train_speed(iter/s)": 0.027295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 88.93750381469727, "completions/min_length": 34.0, "epoch": 6.564408041697692, "grad_norm": 1.123800907898283, "kl": 0.31982421875, "learning_rate": 2.6886351729437755e-07, "loss": -0.005659705027937889, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4405, "train_speed(iter/s)": 0.027294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 97.18750381469727, "completions/min_length": 36.25, "epoch": 6.565897244973939, "grad_norm": 0.003183434095665193, "kl": 0.2705078125, "learning_rate": 2.686538345927026e-07, "loss": 0.0002707675448618829, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4406, "train_speed(iter/s)": 0.027296 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 87.21875190734863, "completions/min_length": 40.5, "epoch": 6.567386448250186, "grad_norm": 0.00374328158038668, "kl": 0.28857421875, "learning_rate": 2.684442036474565e-07, "loss": 0.00028800172731280327, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4407, "train_speed(iter/s)": 0.027298 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.75, "completions/mean_length": 97.44791984558105, "completions/min_length": 40.25, "epoch": 6.568875651526433, "grad_norm": 0.0029374274659248337, "kl": 0.29345703125, "learning_rate": 2.682346245055372e-07, "loss": 0.00029325007926672697, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4408, "train_speed(iter/s)": 0.027294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 89.06250190734863, "completions/min_length": 28.0, "epoch": 6.570364854802681, "grad_norm": 0.0038975237469417905, "kl": 0.30419921875, "learning_rate": 2.6802509721383135e-07, "loss": 0.00030500628054142, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4409, "train_speed(iter/s)": 0.027294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.0, "completions/mean_length": 100.93750190734863, "completions/min_length": 41.0, "epoch": 6.5718540580789275, "grad_norm": 0.0029484524335502147, "kl": 0.27734375, "learning_rate": 2.678156218192147e-07, "loss": 0.000277219049166888, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4410, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 95.25000381469727, "completions/min_length": 40.5, "epoch": 6.573343261355175, "grad_norm": 0.7238371235174542, "kl": 0.28564453125, "learning_rate": 2.6760619836854983e-07, "loss": -0.014181667938828468, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4411, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 93.13541793823242, "completions/min_length": 38.5, "epoch": 6.574832464631422, "grad_norm": 0.0030699461266654243, "kl": 0.2724609375, "learning_rate": 2.6739682690868943e-07, "loss": 0.00027243088698014617, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4412, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 91.67708587646484, "completions/min_length": 38.75, "epoch": 6.57632166790767, "grad_norm": 0.0030051250563214067, "kl": 0.293701171875, "learning_rate": 2.6718750748647353e-07, "loss": 0.00029399190680123866, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4413, "train_speed(iter/s)": 0.027295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 100.03125190734863, "completions/min_length": 34.25, "epoch": 6.5778108711839165, "grad_norm": 0.003091117189418328, "kl": 0.261474609375, "learning_rate": 2.669782401487307e-07, "loss": 0.0002606915950309485, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4414, "train_speed(iter/s)": 0.027295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 96.83333587646484, "completions/min_length": 47.0, "epoch": 6.579300074460164, "grad_norm": 0.0029241593360183147, "kl": 0.28857421875, "learning_rate": 2.6676902494227794e-07, "loss": 0.00028894859133288264, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4415, "train_speed(iter/s)": 0.027294 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 93.58333587646484, "completions/min_length": 35.75, "epoch": 6.580789277736411, "grad_norm": 2.4009406495023913, "kl": 0.299560546875, "learning_rate": 2.665598619139205e-07, "loss": 0.003427250310778618, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4416, "train_speed(iter/s)": 0.027292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 89.48958778381348, "completions/min_length": 38.75, "epoch": 6.582278481012658, "grad_norm": 0.0033339081209682853, "kl": 0.29833984375, "learning_rate": 2.663507511104522e-07, "loss": 0.0002981661236844957, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4417, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 85.29166984558105, "completions/min_length": 36.25, "epoch": 6.5837676842889055, "grad_norm": 1.7436767601410281, "kl": 0.31884765625, "learning_rate": 2.661416925786551e-07, "loss": -1.1776263818319421e-05, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4418, "train_speed(iter/s)": 0.027295 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 98.05208587646484, "completions/min_length": 31.25, "epoch": 6.585256887565152, "grad_norm": 0.0028633645718598314, "kl": 0.281982421875, "learning_rate": 2.659326863652992e-07, "loss": 0.00028138532070443034, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4419, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 95.84375, "completions/min_length": 38.75, "epoch": 6.5867460908414, "grad_norm": 1.6956914066324906, "kl": 0.287109375, "learning_rate": 2.6572373251714327e-07, "loss": 0.008094379678368568, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4420, "train_speed(iter/s)": 0.027292 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 87.10416984558105, "completions/min_length": 43.25, "epoch": 6.588235294117647, "grad_norm": 0.0030568586413949593, "kl": 0.294677734375, "learning_rate": 2.6551483108093373e-07, "loss": 0.00029430576250888407, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4421, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 95.69791984558105, "completions/min_length": 40.5, "epoch": 6.589724497393894, "grad_norm": 0.0032601788194602643, "kl": 0.291259765625, "learning_rate": 2.653059821034064e-07, "loss": 0.0002908636233769357, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4422, "train_speed(iter/s)": 0.027291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.75, "completions/mean_length": 93.68750190734863, "completions/min_length": 38.0, "epoch": 6.591213700670141, "grad_norm": 0.002920153396692502, "kl": 0.2841796875, "learning_rate": 2.6509718563128425e-07, "loss": 0.00028432387625798583, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4423, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.75, "completions/mean_length": 98.0625, "completions/min_length": 33.5, "epoch": 6.592702903946389, "grad_norm": 0.002962906142592236, "kl": 0.280517578125, "learning_rate": 2.64888441711279e-07, "loss": 0.00027995993150398135, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4424, "train_speed(iter/s)": 0.02729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.5, "completions/mean_length": 97.70833396911621, "completions/min_length": 45.5, "epoch": 6.594192107222636, "grad_norm": 1.6771083446154234, "kl": 0.29931640625, "learning_rate": 2.646797503900905e-07, "loss": -0.011597171425819397, "memory(GiB)": 112.53, "reward": 1.5416666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666716337204, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4425, "train_speed(iter/s)": 0.027288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 92.31250381469727, "completions/min_length": 39.25, "epoch": 6.595681310498883, "grad_norm": 0.00290572800072961, "kl": 0.27294921875, "learning_rate": 2.6447111171440696e-07, "loss": 0.000273534853477031, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4426, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.0, "completions/mean_length": 74.58333778381348, "completions/min_length": 32.0, "epoch": 6.59717051377513, "grad_norm": 1.028855565461544, "kl": 0.58837890625, "learning_rate": 2.6426252573090437e-07, "loss": 0.0039017293602228165, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4427, "train_speed(iter/s)": 0.02729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 96.29166793823242, "completions/min_length": 44.0, "epoch": 6.598659717051378, "grad_norm": 0.0029334209766648162, "kl": 0.27783203125, "learning_rate": 2.640539924862478e-07, "loss": 0.00027811992913484573, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4428, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/mean_length": 99.08333587646484, "completions/min_length": 40.75, "epoch": 6.600148920327625, "grad_norm": 0.0026876118297857686, "kl": 0.2587890625, "learning_rate": 2.638455120270898e-07, "loss": 0.00025892030680552125, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4429, "train_speed(iter/s)": 0.02729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 98.06250381469727, "completions/min_length": 49.5, "epoch": 6.6016381236038715, "grad_norm": 1.2226140182755987, "kl": 0.27490234375, "learning_rate": 2.636370844000713e-07, "loss": -0.01938548870384693, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4430, "train_speed(iter/s)": 0.027288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 97.08333396911621, "completions/min_length": 47.75, "epoch": 6.603127326880119, "grad_norm": 0.0029303127584521605, "kl": 0.2763671875, "learning_rate": 2.634287096518214e-07, "loss": 0.0002763145021162927, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4431, "train_speed(iter/s)": 0.027288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.75, "completions/mean_length": 92.32291793823242, "completions/min_length": 28.25, "epoch": 6.604616530156366, "grad_norm": 0.00305705083684385, "kl": 0.29150390625, "learning_rate": 2.632203878289573e-07, "loss": 0.00029143248684704304, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4432, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 98.71875190734863, "completions/min_length": 45.25, "epoch": 6.606105733432614, "grad_norm": 0.0029650112675256657, "kl": 0.269775390625, "learning_rate": 2.630121189780846e-07, "loss": 0.0002698410244192928, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4433, "train_speed(iter/s)": 0.02729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.75, "completions/mean_length": 87.39583396911621, "completions/min_length": 23.75, "epoch": 6.6075949367088604, "grad_norm": 0.0028164421982501083, "kl": 0.27294921875, "learning_rate": 2.6280390314579705e-07, "loss": 0.0002727326354943216, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4434, "train_speed(iter/s)": 0.027291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 369.75, "completions/mean_length": 110.67708396911621, "completions/min_length": 43.0, "epoch": 6.609084139985108, "grad_norm": 0.0030352169510540913, "kl": 0.275390625, "learning_rate": 2.625957403786759e-07, "loss": 0.00027493975358083844, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4435, "train_speed(iter/s)": 0.02729 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 91.39583587646484, "completions/min_length": 46.5, "epoch": 6.610573343261355, "grad_norm": 0.0026196763393201623, "kl": 0.2841796875, "learning_rate": 2.6238763072329185e-07, "loss": 0.0002846228308044374, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4436, "train_speed(iter/s)": 0.027291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 94.79166984558105, "completions/min_length": 33.0, "epoch": 6.612062546537603, "grad_norm": 0.033095023907583114, "kl": 0.41357421875, "learning_rate": 2.621795742262019e-07, "loss": 0.00041350640822201967, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4437, "train_speed(iter/s)": 0.027291 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 94.07291793823242, "completions/min_length": 31.75, "epoch": 6.613551749813849, "grad_norm": 0.002981079894826168, "kl": 0.29248046875, "learning_rate": 2.6197157093395307e-07, "loss": 0.00029276940040290356, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4438, "train_speed(iter/s)": 0.027293 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 97.12500190734863, "completions/min_length": 39.5, "epoch": 6.615040953090097, "grad_norm": 1.2454947773762057, "kl": 0.278564453125, "learning_rate": 2.617636208930787e-07, "loss": -0.012109587900340557, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4439, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 99.78125190734863, "completions/min_length": 44.25, "epoch": 6.616530156366344, "grad_norm": 1.0453265068210629, "kl": 0.285888671875, "learning_rate": 2.6155572415010167e-07, "loss": -0.00030016733217053115, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8020833507180214, "rewards/CineAccuracyORM/std": 0.24199935421347618, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4440, "train_speed(iter/s)": 0.027287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 98.91666984558105, "completions/min_length": 36.0, "epoch": 6.618019359642592, "grad_norm": 0.00277947282660101, "kl": 0.2861328125, "learning_rate": 2.613478807515322e-07, "loss": 0.00028645308339037, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4441, "train_speed(iter/s)": 0.027287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.25, "completions/mean_length": 93.59375190734863, "completions/min_length": 42.25, "epoch": 6.619508562918838, "grad_norm": 0.0028568094808205497, "kl": 0.287109375, "learning_rate": 2.6114009074386844e-07, "loss": 0.0002870315802283585, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4442, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 89.72916793823242, "completions/min_length": 37.5, "epoch": 6.620997766195085, "grad_norm": 0.00306418721611864, "kl": 0.314453125, "learning_rate": 2.609323541735975e-07, "loss": 0.00031353242229670286, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4443, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 105.22916984558105, "completions/min_length": 48.5, "epoch": 6.622486969471333, "grad_norm": 0.003259427523729484, "kl": 0.28271484375, "learning_rate": 2.6072467108719317e-07, "loss": 0.0002822905080392957, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4444, "train_speed(iter/s)": 0.027287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.25, "completions/mean_length": 91.82291984558105, "completions/min_length": 39.25, "epoch": 6.62397617274758, "grad_norm": 0.6693308097859796, "kl": 0.2978515625, "learning_rate": 2.605170415311184e-07, "loss": -0.005056457594037056, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4445, "train_speed(iter/s)": 0.027288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 98.71875190734863, "completions/min_length": 43.25, "epoch": 6.625465376023827, "grad_norm": 0.003377992730908324, "kl": 0.29638671875, "learning_rate": 2.6030946555182386e-07, "loss": 0.0002962720755022019, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4446, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 96.91666984558105, "completions/min_length": 36.75, "epoch": 6.626954579300074, "grad_norm": 0.0029696366132711907, "kl": 0.291015625, "learning_rate": 2.6010194319574774e-07, "loss": 0.000290901109110564, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4447, "train_speed(iter/s)": 0.027289 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 94.25000381469727, "completions/min_length": 36.5, "epoch": 6.628443782576322, "grad_norm": 0.0026988456372888446, "kl": 0.2861328125, "learning_rate": 2.598944745093173e-07, "loss": 0.00028623826801776886, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4448, "train_speed(iter/s)": 0.027288 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 92.56250381469727, "completions/min_length": 39.0, "epoch": 6.629932985852569, "grad_norm": 0.002746076895129677, "kl": 0.2861328125, "learning_rate": 2.596870595389463e-07, "loss": 0.00028596838819794357, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4449, "train_speed(iter/s)": 0.027286 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 102.14583587646484, "completions/min_length": 35.25, "epoch": 6.631422189128816, "grad_norm": 0.003031274933665671, "kl": 0.2666015625, "learning_rate": 2.59479698331038e-07, "loss": 0.0002672246191650629, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4450, "train_speed(iter/s)": 0.027286 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 101.52083396911621, "completions/min_length": 44.0, "epoch": 6.632911392405063, "grad_norm": 0.003136555029116276, "kl": 0.2724609375, "learning_rate": 2.5927239093198273e-07, "loss": 0.0002727487008087337, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4451, "train_speed(iter/s)": 0.027286 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 95.64583587646484, "completions/min_length": 37.5, "epoch": 6.634400595681311, "grad_norm": 0.002563041647271671, "kl": 0.2958984375, "learning_rate": 2.5906513738815893e-07, "loss": 0.0002955043746624142, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4452, "train_speed(iter/s)": 0.027285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 86.13541793823242, "completions/min_length": 30.0, "epoch": 6.635889798957558, "grad_norm": 0.0028654380758074503, "kl": 0.28466796875, "learning_rate": 2.588579377459331e-07, "loss": 0.00028377596754580736, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4453, "train_speed(iter/s)": 0.027287 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 100.66666793823242, "completions/min_length": 38.25, "epoch": 6.637379002233805, "grad_norm": 0.003155144250475114, "kl": 0.29296875, "learning_rate": 2.586507920516595e-07, "loss": 0.0002928536559920758, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4454, "train_speed(iter/s)": 0.027286 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 97.31250190734863, "completions/min_length": 41.25, "epoch": 6.638868205510052, "grad_norm": 1.2038119273575445, "kl": 0.31103515625, "learning_rate": 2.584437003516807e-07, "loss": -0.0008526422898285091, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4455, "train_speed(iter/s)": 0.027285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 94.34375190734863, "completions/min_length": 31.75, "epoch": 6.640357408786299, "grad_norm": 0.0033145217863077663, "kl": 0.28564453125, "learning_rate": 2.58236662692327e-07, "loss": 0.0002857876825146377, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4456, "train_speed(iter/s)": 0.027283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 93.44791984558105, "completions/min_length": 37.25, "epoch": 6.641846612062547, "grad_norm": 0.002959340870014335, "kl": 0.2919921875, "learning_rate": 2.5802967911991635e-07, "loss": 0.000291348434984684, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4457, "train_speed(iter/s)": 0.027283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.5, "completions/mean_length": 99.46875190734863, "completions/min_length": 44.75, "epoch": 6.643335815338793, "grad_norm": 0.003050814702935266, "kl": 0.262939453125, "learning_rate": 2.578227496807549e-07, "loss": 0.0002633790427353233, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4458, "train_speed(iter/s)": 0.027285 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 92.30208587646484, "completions/min_length": 29.75, "epoch": 6.644825018615041, "grad_norm": 1.9635135695886772, "kl": 0.3046875, "learning_rate": 2.5761587442113643e-07, "loss": 0.004862673580646515, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4459, "train_speed(iter/s)": 0.027283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.25, "completions/mean_length": 94.79166984558105, "completions/min_length": 46.5, "epoch": 6.646314221891288, "grad_norm": 0.0027793412938336834, "kl": 0.27392578125, "learning_rate": 2.5740905338734306e-07, "loss": 0.0002739334886427969, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4460, "train_speed(iter/s)": 0.027284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 94.05208587646484, "completions/min_length": 35.25, "epoch": 6.6478034251675355, "grad_norm": 0.002781326293373759, "kl": 0.279296875, "learning_rate": 2.5720228662564446e-07, "loss": 0.00027922747540287673, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4461, "train_speed(iter/s)": 0.027283 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.0, "completions/mean_length": 96.85416984558105, "completions/min_length": 39.75, "epoch": 6.649292628443782, "grad_norm": 1.0077276998172506, "kl": 0.306640625, "learning_rate": 2.5699557418229814e-07, "loss": -0.03133203834295273, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4462, "train_speed(iter/s)": 0.027284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 98.47916984558105, "completions/min_length": 39.5, "epoch": 6.65078183172003, "grad_norm": 0.0027642901669530243, "kl": 0.284423828125, "learning_rate": 2.5678891610354935e-07, "loss": 0.0002848233561962843, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4463, "train_speed(iter/s)": 0.027284 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.75, "completions/mean_length": 98.44791984558105, "completions/min_length": 40.0, "epoch": 6.652271034996277, "grad_norm": 0.002742970748020948, "kl": 0.2646484375, "learning_rate": 2.5658231243563154e-07, "loss": 0.0002654577838256955, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4464, "train_speed(iter/s)": 0.027281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 91.83333587646484, "completions/min_length": 30.0, "epoch": 6.6537602382725245, "grad_norm": 0.003129245990803224, "kl": 0.28466796875, "learning_rate": 2.563757632247655e-07, "loss": 0.00028526136884465814, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4465, "train_speed(iter/s)": 0.027282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 79.29166793823242, "completions/min_length": 34.25, "epoch": 6.655249441548771, "grad_norm": 3.2560240414567945, "kl": 0.30615234375, "learning_rate": 2.561692685171605e-07, "loss": -0.004473460838198662, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4466, "train_speed(iter/s)": 0.027282 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 93.09375190734863, "completions/min_length": 36.0, "epoch": 6.656738644825019, "grad_norm": 0.0033871192169052816, "kl": 0.291015625, "learning_rate": 2.559628283590131e-07, "loss": 0.000290472962660715, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4467, "train_speed(iter/s)": 0.027281 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 326.5, "completions/mean_length": 100.94791793823242, "completions/min_length": 46.75, "epoch": 6.658227848101266, "grad_norm": 0.002688390462358015, "kl": 0.28271484375, "learning_rate": 2.5575644279650766e-07, "loss": 0.0002824003458954394, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4468, "train_speed(iter/s)": 0.027279 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 111.20833587646484, "completions/min_length": 43.0, "epoch": 6.659717051377513, "grad_norm": 0.0028111308588362575, "kl": 0.27294921875, "learning_rate": 2.555501118758167e-07, "loss": 0.00027319329092279077, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4469, "train_speed(iter/s)": 0.027279 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.25, "completions/mean_length": 91.35416984558105, "completions/min_length": 32.25, "epoch": 6.66120625465376, "grad_norm": 0.003036860582403853, "kl": 0.30712890625, "learning_rate": 2.5534383564309976e-07, "loss": 0.00030643987702205777, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4470, "train_speed(iter/s)": 0.027278 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.5, "completions/mean_length": 93.90625190734863, "completions/min_length": 37.0, "epoch": 6.662695457930007, "grad_norm": 0.5916125651313782, "kl": 0.296875, "learning_rate": 2.551376141445053e-07, "loss": 0.0017465339042246342, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4471, "train_speed(iter/s)": 0.027275 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 88.25000190734863, "completions/min_length": 41.5, "epoch": 6.664184661206255, "grad_norm": 0.002817731967464739, "kl": 0.30712890625, "learning_rate": 2.5493144742616856e-07, "loss": 0.0003073221887461841, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4472, "train_speed(iter/s)": 0.027272 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 98.88541793823242, "completions/min_length": 43.0, "epoch": 6.6656738644825015, "grad_norm": 0.0029364188869826467, "kl": 0.296875, "learning_rate": 2.54725335534213e-07, "loss": 0.00029658235143870115, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4473, "train_speed(iter/s)": 0.027269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.0, "completions/mean_length": 100.53125381469727, "completions/min_length": 37.0, "epoch": 6.667163067758749, "grad_norm": 0.002745484724995893, "kl": 0.28125, "learning_rate": 2.545192785147495e-07, "loss": 0.00028114073211327195, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4474, "train_speed(iter/s)": 0.027267 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 96.31250190734863, "completions/min_length": 44.5, "epoch": 6.668652271034996, "grad_norm": 1.931898389107105, "kl": 0.293212890625, "learning_rate": 2.543132764138768e-07, "loss": 0.03110501915216446, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.08908708393573761, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3432477116584778, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4475, "train_speed(iter/s)": 0.027264 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 91.33333587646484, "completions/min_length": 36.25, "epoch": 6.670141474311244, "grad_norm": 1.2476726283988082, "kl": 0.31640625, "learning_rate": 2.5410732927768185e-07, "loss": 0.004315998870879412, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4476, "train_speed(iter/s)": 0.027261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 102.07291984558105, "completions/min_length": 38.0, "epoch": 6.6716306775874905, "grad_norm": 0.00289256622559787, "kl": 0.25830078125, "learning_rate": 2.539014371522382e-07, "loss": 0.00025837853900156915, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4477, "train_speed(iter/s)": 0.027262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 92.85416984558105, "completions/min_length": 37.5, "epoch": 6.673119880863738, "grad_norm": 0.0028548953546239076, "kl": 0.2958984375, "learning_rate": 2.536956000836082e-07, "loss": 0.00029541924595832825, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4478, "train_speed(iter/s)": 0.027263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 96.58333396911621, "completions/min_length": 40.0, "epoch": 6.674609084139985, "grad_norm": 0.005595528091471962, "kl": 0.2822265625, "learning_rate": 2.534898181178413e-07, "loss": 0.000282079738099128, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4479, "train_speed(iter/s)": 0.027261 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 87.26042175292969, "completions/min_length": 38.25, "epoch": 6.676098287416233, "grad_norm": 0.011259821044423846, "kl": 0.29833984375, "learning_rate": 2.532840913009745e-07, "loss": 0.00029858978814445436, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4480, "train_speed(iter/s)": 0.027263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 100.68750381469727, "completions/min_length": 38.25, "epoch": 6.6775874906924795, "grad_norm": 1.08537713077612, "kl": 0.273681640625, "learning_rate": 2.530784196790333e-07, "loss": 0.0032861060462892056, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4481, "train_speed(iter/s)": 0.027263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 97.50000381469727, "completions/min_length": 35.5, "epoch": 6.679076693968726, "grad_norm": 0.003493443355258407, "kl": 0.27490234375, "learning_rate": 2.528728032980295e-07, "loss": 0.00027498998679220676, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4482, "train_speed(iter/s)": 0.027262 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 103.26041793823242, "completions/min_length": 42.0, "epoch": 6.680565897244974, "grad_norm": 0.0029486065422659746, "kl": 0.265625, "learning_rate": 2.5266724220396384e-07, "loss": 0.00026559396064840257, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4483, "train_speed(iter/s)": 0.027263 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 85.47916793823242, "completions/min_length": 29.0, "epoch": 6.682055100521221, "grad_norm": 1.215203046290731, "kl": 0.33154296875, "learning_rate": 2.52461736442824e-07, "loss": -0.014128229580819607, "memory(GiB)": 112.53, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4484, "train_speed(iter/s)": 0.027259 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 97.93750190734863, "completions/min_length": 36.75, "epoch": 6.6835443037974684, "grad_norm": 0.12342322486989926, "kl": 0.291015625, "learning_rate": 2.5225628606058523e-07, "loss": 0.0002913280331995338, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4485, "train_speed(iter/s)": 0.027257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.25, "completions/mean_length": 93.61458778381348, "completions/min_length": 32.25, "epoch": 6.685033507073715, "grad_norm": 0.0026839341661133143, "kl": 0.28369140625, "learning_rate": 2.5205089110321116e-07, "loss": 0.00028364418540149927, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4486, "train_speed(iter/s)": 0.027256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.5, "completions/mean_length": 91.75000381469727, "completions/min_length": 46.0, "epoch": 6.686522710349963, "grad_norm": 0.0038350332296758793, "kl": 0.2900390625, "learning_rate": 2.5184555161665166e-07, "loss": 0.00029068259755149484, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4487, "train_speed(iter/s)": 0.027258 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 102.65625190734863, "completions/min_length": 38.5, "epoch": 6.68801191362621, "grad_norm": 0.005003774847958701, "kl": 0.267333984375, "learning_rate": 2.516402676468456e-07, "loss": 0.0002676909789443016, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4488, "train_speed(iter/s)": 0.027258 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 92.03125381469727, "completions/min_length": 36.25, "epoch": 6.689501116902457, "grad_norm": 0.003170442397332663, "kl": 0.3076171875, "learning_rate": 2.514350392397186e-07, "loss": 0.00030787562718614936, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4489, "train_speed(iter/s)": 0.027257 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 92.23958587646484, "completions/min_length": 33.0, "epoch": 6.690990320178704, "grad_norm": 0.00273177444555417, "kl": 0.298828125, "learning_rate": 2.5122986644118404e-07, "loss": 0.0002993473317474127, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4490, "train_speed(iter/s)": 0.027256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 412.75, "completions/mean_length": 104.33333587646484, "completions/min_length": 40.5, "epoch": 6.692479523454952, "grad_norm": 0.0027276029348304916, "kl": 0.26611328125, "learning_rate": 2.5102474929714297e-07, "loss": 0.00026563534629531205, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4491, "train_speed(iter/s)": 0.027256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.25, "completions/mean_length": 100.80208587646484, "completions/min_length": 35.25, "epoch": 6.693968726731199, "grad_norm": 0.9256947775196093, "kl": 0.26806640625, "learning_rate": 2.5081968785348363e-07, "loss": 0.021306734532117844, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4492, "train_speed(iter/s)": 0.027255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 91.63541984558105, "completions/min_length": 38.25, "epoch": 6.695457930007446, "grad_norm": 0.002926678384947087, "kl": 0.275634765625, "learning_rate": 2.5061468215608237e-07, "loss": 0.0002753050357569009, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4493, "train_speed(iter/s)": 0.027256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 92.31250190734863, "completions/min_length": 37.75, "epoch": 6.696947133283693, "grad_norm": 0.003021589329039892, "kl": 0.2900390625, "learning_rate": 2.5040973225080276e-07, "loss": 0.00029038178035989404, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4494, "train_speed(iter/s)": 0.027255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 85.61458778381348, "completions/min_length": 37.5, "epoch": 6.69843633655994, "grad_norm": 1.2342185379430433, "kl": 0.31201171875, "learning_rate": 2.502048381834958e-07, "loss": 0.001503773033618927, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166744276881, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4495, "train_speed(iter/s)": 0.027254 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 84.21875190734863, "completions/min_length": 34.75, "epoch": 6.699925539836188, "grad_norm": 0.002891689756428179, "kl": 0.2998046875, "learning_rate": 2.500000000000001e-07, "loss": 0.000299414386972785, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4496, "train_speed(iter/s)": 0.027256 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.25, "completions/mean_length": 92.44792175292969, "completions/min_length": 29.0, "epoch": 6.701414743112435, "grad_norm": 0.7023056627061933, "kl": 0.46240234375, "learning_rate": 2.497952177461415e-07, "loss": -0.003369885263964534, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4497, "train_speed(iter/s)": 0.027255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 89.03125381469727, "completions/min_length": 32.5, "epoch": 6.702903946388682, "grad_norm": 0.002956481190913745, "kl": 0.30517578125, "learning_rate": 2.4959049146773417e-07, "loss": 0.0003058900765608996, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4498, "train_speed(iter/s)": 0.027255 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 93.16666793823242, "completions/min_length": 39.75, "epoch": 6.704393149664929, "grad_norm": 0.002989502635448839, "kl": 0.30419921875, "learning_rate": 2.4938582121057876e-07, "loss": 0.0003041219024453312, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4499, "train_speed(iter/s)": 0.027251 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.0, "completions/mean_length": 98.87500190734863, "completions/min_length": 35.25, "epoch": 6.705882352941177, "grad_norm": 0.011778339671057294, "kl": 0.29248046875, "learning_rate": 2.491812070204639e-07, "loss": 0.0002924351720139384, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4500, "train_speed(iter/s)": 0.027247 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 94.42708587646484, "completions/min_length": 36.5, "epoch": 6.707371556217423, "grad_norm": 0.7071814678060808, "kl": 0.29736328125, "learning_rate": 2.489766489431656e-07, "loss": -0.012049386277794838, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4501, "train_speed(iter/s)": 0.027236 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 94.10416793823242, "completions/min_length": 46.25, "epoch": 6.708860759493671, "grad_norm": 0.0036084923448554147, "kl": 0.2998046875, "learning_rate": 2.4877214702444725e-07, "loss": 0.00029953347984701395, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4502, "train_speed(iter/s)": 0.027237 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.5, "completions/mean_length": 96.01041793823242, "completions/min_length": 43.5, "epoch": 6.710349962769918, "grad_norm": 0.0028822099020063795, "kl": 0.27392578125, "learning_rate": 2.485677013100596e-07, "loss": 0.0002737230097409338, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4503, "train_speed(iter/s)": 0.027237 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 100.95833587646484, "completions/min_length": 42.25, "epoch": 6.711839166046166, "grad_norm": 2.550197605837616, "kl": 0.26611328125, "learning_rate": 2.4836331184574125e-07, "loss": -0.0014081664849072695, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4504, "train_speed(iter/s)": 0.027237 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 92.66666984558105, "completions/min_length": 35.25, "epoch": 6.713328369322412, "grad_norm": 0.0032085502515428166, "kl": 0.283203125, "learning_rate": 2.481589786772178e-07, "loss": 0.0002828041324391961, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4505, "train_speed(iter/s)": 0.027237 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 101.67708587646484, "completions/min_length": 34.0, "epoch": 6.71481757259866, "grad_norm": 0.0028297940681249662, "kl": 0.281494140625, "learning_rate": 2.4795470185020233e-07, "loss": 0.00028111698338761926, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4506, "train_speed(iter/s)": 0.027236 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 102.38541984558105, "completions/min_length": 39.5, "epoch": 6.716306775874907, "grad_norm": 0.0033751143117143356, "kl": 0.269287109375, "learning_rate": 2.4775048141039533e-07, "loss": 0.0002691112458705902, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4507, "train_speed(iter/s)": 0.027234 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 89.54166984558105, "completions/min_length": 44.25, "epoch": 6.717795979151154, "grad_norm": 0.0030386487345780266, "kl": 0.289306640625, "learning_rate": 2.475463174034845e-07, "loss": 0.0002890498726628721, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4508, "train_speed(iter/s)": 0.027235 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 94.07291793823242, "completions/min_length": 43.0, "epoch": 6.719285182427401, "grad_norm": 2.959087268147368, "kl": 0.2587890625, "learning_rate": 2.473422098751456e-07, "loss": -0.0031665940769016743, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4509, "train_speed(iter/s)": 0.027236 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 104.80208587646484, "completions/min_length": 40.25, "epoch": 6.720774385703649, "grad_norm": 0.02762072510067199, "kl": 0.3447265625, "learning_rate": 2.47138158871041e-07, "loss": 0.0003452771343290806, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4510, "train_speed(iter/s)": 0.027236 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 101.96875381469727, "completions/min_length": 39.25, "epoch": 6.722263588979896, "grad_norm": 0.0032479757708126235, "kl": 0.29296875, "learning_rate": 2.469341644368207e-07, "loss": 0.0002932466450147331, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4511, "train_speed(iter/s)": 0.027236 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.5, "completions/mean_length": 83.92708396911621, "completions/min_length": 32.75, "epoch": 6.723752792256143, "grad_norm": 0.5055690241193326, "kl": 0.34326171875, "learning_rate": 2.4673022661812204e-07, "loss": -0.0155256949365139, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4512, "train_speed(iter/s)": 0.027233 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 98.17708587646484, "completions/min_length": 38.75, "epoch": 6.72524199553239, "grad_norm": 0.016437165577426, "kl": 0.3232421875, "learning_rate": 2.465263454605695e-07, "loss": 0.00032294070115312934, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4513, "train_speed(iter/s)": 0.027232 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 94.20833587646484, "completions/min_length": 40.75, "epoch": 6.726731198808637, "grad_norm": 0.0030055624177529375, "kl": 0.2822265625, "learning_rate": 2.4632252100977564e-07, "loss": 0.0002826155396178365, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4514, "train_speed(iter/s)": 0.027232 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.25, "completions/mean_length": 87.58333778381348, "completions/min_length": 44.0, "epoch": 6.728220402084885, "grad_norm": 0.003151729681184919, "kl": 0.29638671875, "learning_rate": 2.461187533113391e-07, "loss": 0.0002956062671728432, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4515, "train_speed(iter/s)": 0.027232 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 99.64583778381348, "completions/min_length": 39.5, "epoch": 6.729709605361132, "grad_norm": 0.002883164450168325, "kl": 0.29541015625, "learning_rate": 2.459150424108469e-07, "loss": 0.0002951654023490846, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4516, "train_speed(iter/s)": 0.02723 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 95.70833778381348, "completions/min_length": 42.75, "epoch": 6.731198808637379, "grad_norm": 0.0029157119575509063, "kl": 0.286865234375, "learning_rate": 2.457113883538729e-07, "loss": 0.00028668108279816806, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4517, "train_speed(iter/s)": 0.027228 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 87.70833587646484, "completions/min_length": 33.5, "epoch": 6.732688011913626, "grad_norm": 2.2407472685758356, "kl": 0.289794921875, "learning_rate": 2.45507791185978e-07, "loss": -0.008397584781050682, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4518, "train_speed(iter/s)": 0.027227 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.5, "completions/mean_length": 101.92708587646484, "completions/min_length": 38.0, "epoch": 6.734177215189874, "grad_norm": 0.0041275023224170695, "kl": 0.26806640625, "learning_rate": 2.4530425095271134e-07, "loss": 0.00026855902979150414, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4519, "train_speed(iter/s)": 0.027223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 97.65625381469727, "completions/min_length": 40.25, "epoch": 6.735666418466121, "grad_norm": 0.0026715428563593383, "kl": 0.27587890625, "learning_rate": 2.451007676996078e-07, "loss": 0.0002755912719294429, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4520, "train_speed(iter/s)": 0.027224 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 96.14583778381348, "completions/min_length": 42.25, "epoch": 6.737155621742367, "grad_norm": 0.003302131815422305, "kl": 0.275634765625, "learning_rate": 2.448973414721909e-07, "loss": 0.00027588309603743255, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4521, "train_speed(iter/s)": 0.027223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.0, "completions/mean_length": 93.88541793823242, "completions/min_length": 44.5, "epoch": 6.738644825018615, "grad_norm": 0.003152667116632738, "kl": 0.28466796875, "learning_rate": 2.4469397231597083e-07, "loss": 0.00028426622156985104, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4522, "train_speed(iter/s)": 0.027223 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 99.58333587646484, "completions/min_length": 38.0, "epoch": 6.740134028294863, "grad_norm": 0.0032004459405741825, "kl": 0.29248046875, "learning_rate": 2.444906602764447e-07, "loss": 0.0002931275521405041, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4523, "train_speed(iter/s)": 0.027222 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 88.15625190734863, "completions/min_length": 38.5, "epoch": 6.7416232315711095, "grad_norm": 0.0034668009970007766, "kl": 0.3193359375, "learning_rate": 2.442874053990978e-07, "loss": 0.0003184039087500423, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4524, "train_speed(iter/s)": 0.02722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 85.92708587646484, "completions/min_length": 39.25, "epoch": 6.743112434847356, "grad_norm": 0.003154740966483694, "kl": 0.27734375, "learning_rate": 2.4408420772940126e-07, "loss": 0.0002766858960967511, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4525, "train_speed(iter/s)": 0.027221 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.75, "completions/mean_length": 85.42708587646484, "completions/min_length": 33.75, "epoch": 6.744601638123604, "grad_norm": 0.0031718918882336564, "kl": 0.3349609375, "learning_rate": 2.438810673128149e-07, "loss": 0.00033497222466394305, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4526, "train_speed(iter/s)": 0.027221 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.25, "completions/mean_length": 86.82291984558105, "completions/min_length": 36.5, "epoch": 6.746090841399851, "grad_norm": 1.3864621187288273, "kl": 0.30322265625, "learning_rate": 2.436779841947843e-07, "loss": -0.014384375885128975, "memory(GiB)": 112.53, "reward": 1.6770833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4527, "train_speed(iter/s)": 0.027222 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 99.41666984558105, "completions/min_length": 40.75, "epoch": 6.7475800446760985, "grad_norm": 0.032816134182611915, "kl": 0.257568359375, "learning_rate": 2.4347495842074343e-07, "loss": 0.0002576567349024117, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4528, "train_speed(iter/s)": 0.027222 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 107.25000381469727, "completions/min_length": 46.25, "epoch": 6.749069247952345, "grad_norm": 1.061814331843663, "kl": 0.26904296875, "learning_rate": 2.432719900361128e-07, "loss": -0.003357013687491417, "memory(GiB)": 112.53, "reward": 1.5104166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4529, "train_speed(iter/s)": 0.027221 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.0, "completions/mean_length": 93.37500381469727, "completions/min_length": 35.75, "epoch": 6.750558451228593, "grad_norm": 0.00320428406961185, "kl": 0.30517578125, "learning_rate": 2.4306907908629996e-07, "loss": 0.0003044804325327277, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4530, "train_speed(iter/s)": 0.02722 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.5, "completions/mean_length": 92.70833778381348, "completions/min_length": 37.5, "epoch": 6.75204765450484, "grad_norm": 0.004225373334188937, "kl": 0.29833984375, "learning_rate": 2.428662256167004e-07, "loss": 0.00029895338229835033, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4531, "train_speed(iter/s)": 0.027219 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.75, "completions/mean_length": 92.19791793823242, "completions/min_length": 33.0, "epoch": 6.7535368577810875, "grad_norm": 2.157349253661769, "kl": 0.29248046875, "learning_rate": 2.426634296726955e-07, "loss": -0.004645837005227804, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.3546011596918106, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4532, "train_speed(iter/s)": 0.027217 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.25, "completions/mean_length": 107.92708778381348, "completions/min_length": 48.5, "epoch": 6.755026061057334, "grad_norm": 0.0031969734864225954, "kl": 0.28857421875, "learning_rate": 2.4246069129965504e-07, "loss": 0.00028833592659793794, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4533, "train_speed(iter/s)": 0.027215 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 104.40625381469727, "completions/min_length": 41.75, "epoch": 6.756515264333581, "grad_norm": 1.3300032448872567, "kl": 0.31640625, "learning_rate": 2.422580105429351e-07, "loss": -0.0015583655331283808, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4534, "train_speed(iter/s)": 0.027215 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 89.53125190734863, "completions/min_length": 40.0, "epoch": 6.758004467609829, "grad_norm": 0.029034872703465893, "kl": 0.31005859375, "learning_rate": 2.4205538744787903e-07, "loss": 0.00030980550218373537, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4535, "train_speed(iter/s)": 0.027213 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 99.07291793823242, "completions/min_length": 41.75, "epoch": 6.759493670886076, "grad_norm": 1.7842475710935124, "kl": 0.27490234375, "learning_rate": 2.4185282205981787e-07, "loss": 0.008066442795097828, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4536, "train_speed(iter/s)": 0.027212 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 105.75000190734863, "completions/min_length": 40.0, "epoch": 6.760982874162323, "grad_norm": 0.0029029585614188677, "kl": 0.26708984375, "learning_rate": 2.416503144240685e-07, "loss": 0.00026708521181717515, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4537, "train_speed(iter/s)": 0.027213 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.75, "completions/mean_length": 94.41666984558105, "completions/min_length": 36.75, "epoch": 6.76247207743857, "grad_norm": 0.002712673810442682, "kl": 0.279541015625, "learning_rate": 2.414478645859363e-07, "loss": 0.00027957334532402456, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4538, "train_speed(iter/s)": 0.027214 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 98.34375381469727, "completions/min_length": 38.5, "epoch": 6.763961280714818, "grad_norm": 0.003047985932966119, "kl": 0.267578125, "learning_rate": 2.412454725907127e-07, "loss": 0.00026759383035823703, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4539, "train_speed(iter/s)": 0.027213 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 100.51041793823242, "completions/min_length": 37.25, "epoch": 6.7654504839910645, "grad_norm": 1.4526259808769557, "kl": 0.29443359375, "learning_rate": 2.4104313848367667e-07, "loss": 0.010581713169813156, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4540, "train_speed(iter/s)": 0.027212 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.5, "completions/mean_length": 101.23958396911621, "completions/min_length": 52.0, "epoch": 6.766939687267312, "grad_norm": 1.271783868574476, "kl": 0.26416015625, "learning_rate": 2.408408623100939e-07, "loss": -0.0007519781356677413, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4541, "train_speed(iter/s)": 0.027212 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.0, "completions/mean_length": 109.27083778381348, "completions/min_length": 46.0, "epoch": 6.768428890543559, "grad_norm": 0.6149609867704278, "kl": 0.267822265625, "learning_rate": 2.406386441152177e-07, "loss": -0.00917092151939869, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4542, "train_speed(iter/s)": 0.027206 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 97.32291984558105, "completions/min_length": 36.75, "epoch": 6.769918093819807, "grad_norm": 0.0031148842128215465, "kl": 0.27197265625, "learning_rate": 2.404364839442878e-07, "loss": 0.00027192558627575636, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4543, "train_speed(iter/s)": 0.027206 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.25, "completions/mean_length": 104.42708587646484, "completions/min_length": 42.0, "epoch": 6.7714072970960535, "grad_norm": 1.2404261105430134, "kl": 0.251953125, "learning_rate": 2.4023438184253116e-07, "loss": -0.0068009961396455765, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4544, "train_speed(iter/s)": 0.027206 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.0, "completions/mean_length": 108.58333587646484, "completions/min_length": 46.5, "epoch": 6.772896500372301, "grad_norm": 0.013309324087290887, "kl": 0.253662109375, "learning_rate": 2.4003233785516184e-07, "loss": 0.00025374552933499217, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4545, "train_speed(iter/s)": 0.027202 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 107.14583587646484, "completions/min_length": 41.75, "epoch": 6.774385703648548, "grad_norm": 1.3418538329133263, "kl": 0.253173828125, "learning_rate": 2.398303520273806e-07, "loss": -0.0017716505099087954, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4546, "train_speed(iter/s)": 0.027204 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 99.07291984558105, "completions/min_length": 42.5, "epoch": 6.775874906924795, "grad_norm": 1.0577192268746147, "kl": 0.27685546875, "learning_rate": 2.396284244043758e-07, "loss": -0.031602371484041214, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4547, "train_speed(iter/s)": 0.027203 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 96.63541984558105, "completions/min_length": 37.0, "epoch": 6.7773641102010425, "grad_norm": 0.0034524384742213197, "kl": 0.29052734375, "learning_rate": 2.394265550313222e-07, "loss": 0.0002903323620557785, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4548, "train_speed(iter/s)": 0.027204 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 106.00000190734863, "completions/min_length": 36.5, "epoch": 6.77885331347729, "grad_norm": 0.9631648122999836, "kl": 0.271484375, "learning_rate": 2.392247439533818e-07, "loss": 0.012037372216582298, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4549, "train_speed(iter/s)": 0.027203 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 90.88541984558105, "completions/min_length": 41.75, "epoch": 6.780342516753537, "grad_norm": 0.003353848859990245, "kl": 0.30615234375, "learning_rate": 2.390229912157033e-07, "loss": 0.0003061332681681961, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4550, "train_speed(iter/s)": 0.027203 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 103.39583587646484, "completions/min_length": 39.5, "epoch": 6.781831720029784, "grad_norm": 3.6416213157729667, "kl": 0.26806640625, "learning_rate": 2.388212968634225e-07, "loss": -0.0114898681640625, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4551, "train_speed(iter/s)": 0.027201 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 109.43750190734863, "completions/min_length": 45.5, "epoch": 6.783320923306031, "grad_norm": 0.0029333932339114938, "kl": 0.251220703125, "learning_rate": 2.3861966094166277e-07, "loss": 0.00025133107556030154, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4552, "train_speed(iter/s)": 0.027199 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 95.72916984558105, "completions/min_length": 40.25, "epoch": 6.784810126582278, "grad_norm": 0.004889505061038798, "kl": 0.297119140625, "learning_rate": 2.3841808349553288e-07, "loss": 0.00029661436565220356, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4553, "train_speed(iter/s)": 0.027199 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 109.89583587646484, "completions/min_length": 43.25, "epoch": 6.786299329858526, "grad_norm": 0.004292959779409828, "kl": 0.260009765625, "learning_rate": 2.3821656457013012e-07, "loss": 0.00025982450461015105, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4554, "train_speed(iter/s)": 0.027197 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 104.79166984558105, "completions/min_length": 40.75, "epoch": 6.787788533134773, "grad_norm": 0.004889137588020556, "kl": 0.26806640625, "learning_rate": 2.3801510421053778e-07, "loss": 0.00026779394829645753, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4555, "train_speed(iter/s)": 0.027195 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.75, "completions/mean_length": 114.16666984558105, "completions/min_length": 52.75, "epoch": 6.78927773641102, "grad_norm": 1.037440009952479, "kl": 0.2626953125, "learning_rate": 2.3781370246182612e-07, "loss": -0.007863101549446583, "memory(GiB)": 112.53, "reward": 1.6041667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4556, "train_speed(iter/s)": 0.027193 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 96.78125190734863, "completions/min_length": 42.25, "epoch": 6.790766939687267, "grad_norm": 0.0035530792134467267, "kl": 0.28173828125, "learning_rate": 2.3761235936905305e-07, "loss": 0.00028159483917988837, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4557, "train_speed(iter/s)": 0.027193 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 112.97916793823242, "completions/min_length": 44.0, "epoch": 6.792256142963515, "grad_norm": 0.0030362586711687807, "kl": 0.26611328125, "learning_rate": 2.3741107497726192e-07, "loss": 0.0002663353516254574, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4558, "train_speed(iter/s)": 0.027191 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 106.31250381469727, "completions/min_length": 46.0, "epoch": 6.793745346239762, "grad_norm": 1.08282646382361, "kl": 0.26171875, "learning_rate": 2.372098493314844e-07, "loss": -0.020861107856035233, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4559, "train_speed(iter/s)": 0.027191 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.75, "completions/mean_length": 107.06250190734863, "completions/min_length": 46.75, "epoch": 6.7952345495160085, "grad_norm": 0.003435597478011526, "kl": 0.27685546875, "learning_rate": 2.3700868247673821e-07, "loss": 0.00027619977481663227, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4560, "train_speed(iter/s)": 0.02719 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 345.25, "completions/mean_length": 117.97917175292969, "completions/min_length": 47.75, "epoch": 6.796723752792256, "grad_norm": 0.003432988964411378, "kl": 0.251220703125, "learning_rate": 2.3680757445802785e-07, "loss": 0.00025077519239857793, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4561, "train_speed(iter/s)": 0.027188 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 93.40625381469727, "completions/min_length": 33.75, "epoch": 6.798212956068504, "grad_norm": 0.0031270517187616483, "kl": 0.26708984375, "learning_rate": 2.3660652532034558e-07, "loss": 0.0002671775291673839, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4562, "train_speed(iter/s)": 0.027187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 103.62500190734863, "completions/min_length": 42.0, "epoch": 6.799702159344751, "grad_norm": 0.0030605766331273853, "kl": 0.27392578125, "learning_rate": 2.3640553510866902e-07, "loss": 0.00027399405371397734, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4563, "train_speed(iter/s)": 0.027187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 97.29166793823242, "completions/min_length": 47.5, "epoch": 6.801191362620997, "grad_norm": 0.004636110832875352, "kl": 0.27880859375, "learning_rate": 2.3620460386796414e-07, "loss": 0.0002790499711409211, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4564, "train_speed(iter/s)": 0.027187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 98.26041793823242, "completions/min_length": 45.0, "epoch": 6.802680565897245, "grad_norm": 0.004817984229358653, "kl": 0.3173828125, "learning_rate": 2.3600373164318227e-07, "loss": 0.0003171769785694778, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4565, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 101.38541793823242, "completions/min_length": 38.0, "epoch": 6.804169769173492, "grad_norm": 0.005881602511670818, "kl": 0.2900390625, "learning_rate": 2.358029184792628e-07, "loss": 0.0002899859973695129, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4566, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 93.22916984558105, "completions/min_length": 31.0, "epoch": 6.80565897244974, "grad_norm": 0.003190153843980149, "kl": 0.29541015625, "learning_rate": 2.3560216442113118e-07, "loss": 0.00029474313487298787, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4567, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 113.98958587646484, "completions/min_length": 47.0, "epoch": 6.807148175725986, "grad_norm": 1.037750786577293, "kl": 0.234375, "learning_rate": 2.3540146951369966e-07, "loss": 0.0007427702657878399, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4568, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 99.48958587646484, "completions/min_length": 39.25, "epoch": 6.808637379002234, "grad_norm": 0.003488440279399636, "kl": 0.27880859375, "learning_rate": 2.352008338018679e-07, "loss": 0.0002787226112559438, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4569, "train_speed(iter/s)": 0.027184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.25, "completions/mean_length": 112.43750190734863, "completions/min_length": 43.75, "epoch": 6.810126582278481, "grad_norm": 0.6574772995184217, "kl": 0.2734375, "learning_rate": 2.350002573305211e-07, "loss": -0.0048774536699056625, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4570, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 94.19791984558105, "completions/min_length": 44.0, "epoch": 6.811615785554729, "grad_norm": 1.2677484121896763, "kl": 0.29052734375, "learning_rate": 2.347997401445325e-07, "loss": 0.017269890755414963, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4571, "train_speed(iter/s)": 0.027182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 98.53125190734863, "completions/min_length": 44.75, "epoch": 6.813104988830975, "grad_norm": 0.002944414406612727, "kl": 0.2783203125, "learning_rate": 2.345992822887614e-07, "loss": 0.00027807813603430986, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4572, "train_speed(iter/s)": 0.027184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.75, "completions/mean_length": 107.59375190734863, "completions/min_length": 43.75, "epoch": 6.814594192107222, "grad_norm": 0.002993905975022898, "kl": 0.2587890625, "learning_rate": 2.3439888380805371e-07, "loss": 0.00025856064166873693, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4573, "train_speed(iter/s)": 0.027184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 90.94791984558105, "completions/min_length": 34.5, "epoch": 6.81608339538347, "grad_norm": 0.0031808940947959845, "kl": 0.291015625, "learning_rate": 2.341985447472428e-07, "loss": 0.0002912945346906781, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4574, "train_speed(iter/s)": 0.027184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.0, "completions/mean_length": 117.93750381469727, "completions/min_length": 45.75, "epoch": 6.8175725986597175, "grad_norm": 0.002823097758521042, "kl": 0.255126953125, "learning_rate": 2.3399826515114756e-07, "loss": 0.0002550197532400489, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4575, "train_speed(iter/s)": 0.027184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.0, "completions/mean_length": 109.81250190734863, "completions/min_length": 47.5, "epoch": 6.819061801935964, "grad_norm": 0.003467435202027793, "kl": 0.2734375, "learning_rate": 2.337980450645748e-07, "loss": 0.00027378456434234977, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4576, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.25, "completions/mean_length": 114.76041984558105, "completions/min_length": 46.25, "epoch": 6.820551005212211, "grad_norm": 0.003757670959947024, "kl": 0.27783203125, "learning_rate": 2.3359788453231722e-07, "loss": 0.0002778442285489291, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4577, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 117.70833778381348, "completions/min_length": 46.75, "epoch": 6.822040208488459, "grad_norm": 0.007611992809795969, "kl": 0.263427734375, "learning_rate": 2.3339778359915446e-07, "loss": 0.00026310328394174576, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4578, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 106.40625381469727, "completions/min_length": 46.75, "epoch": 6.823529411764706, "grad_norm": 0.8175440169847106, "kl": 0.2578125, "learning_rate": 2.3319774230985284e-07, "loss": 0.003825591644272208, "memory(GiB)": 112.53, "reward": 1.5729166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4579, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.0, "completions/mean_length": 107.47917175292969, "completions/min_length": 44.25, "epoch": 6.825018615040953, "grad_norm": 0.003346014542405333, "kl": 0.27294921875, "learning_rate": 2.3299776070916516e-07, "loss": 0.00027310423320159316, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4580, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.75, "completions/mean_length": 108.26041793823242, "completions/min_length": 43.75, "epoch": 6.8265078183172, "grad_norm": 0.0029654303140078165, "kl": 0.263671875, "learning_rate": 2.327978388418313e-07, "loss": 0.00026315110153518617, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4581, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 315.0, "completions/mean_length": 111.46875, "completions/min_length": 38.25, "epoch": 6.827997021593448, "grad_norm": 0.003287530940954993, "kl": 0.265380859375, "learning_rate": 2.325979767525773e-07, "loss": 0.00026586552849039435, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4582, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.0, "completions/mean_length": 118.73958778381348, "completions/min_length": 43.0, "epoch": 6.829486224869695, "grad_norm": 0.002899873024092153, "kl": 0.24853515625, "learning_rate": 2.3239817448611616e-07, "loss": 0.0002486125740688294, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4583, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 97.40625381469727, "completions/min_length": 34.5, "epoch": 6.830975428145942, "grad_norm": 0.007116553208457887, "kl": 0.28515625, "learning_rate": 2.3219843208714717e-07, "loss": 0.0002846564457286149, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4584, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.75, "completions/mean_length": 119.73958587646484, "completions/min_length": 50.25, "epoch": 6.832464631422189, "grad_norm": 0.5070772080553861, "kl": 0.238037109375, "learning_rate": 2.319987496003563e-07, "loss": -0.021243944764137268, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4585, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.0, "completions/mean_length": 110.04166984558105, "completions/min_length": 42.5, "epoch": 6.833953834698436, "grad_norm": 0.0032152558606393005, "kl": 0.27001953125, "learning_rate": 2.3179912707041666e-07, "loss": 0.00026961578987538815, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4586, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.25, "completions/mean_length": 100.70833396911621, "completions/min_length": 35.5, "epoch": 6.8354430379746836, "grad_norm": 0.0034529148643798664, "kl": 0.29638671875, "learning_rate": 2.3159956454198726e-07, "loss": 0.0002968918124679476, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4587, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.25, "completions/mean_length": 104.07291984558105, "completions/min_length": 37.75, "epoch": 6.836932241250931, "grad_norm": 0.0030194723748125426, "kl": 0.265380859375, "learning_rate": 2.314000620597139e-07, "loss": 0.00026558441459201276, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4588, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 98.71875190734863, "completions/min_length": 42.25, "epoch": 6.838421444527178, "grad_norm": 2.4252751425297463, "kl": 0.28369140625, "learning_rate": 2.3120061966822914e-07, "loss": -0.024727752432227135, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4589, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 91.47916984558105, "completions/min_length": 33.5, "epoch": 6.839910647803425, "grad_norm": 0.004630663066164191, "kl": 0.3046875, "learning_rate": 2.310012374121517e-07, "loss": 0.0003047719656024128, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4590, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 96.95833587646484, "completions/min_length": 49.5, "epoch": 6.8413998510796725, "grad_norm": 0.0030348600843253257, "kl": 0.255126953125, "learning_rate": 2.3080191533608718e-07, "loss": 0.00025461800396442413, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4591, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.5, "completions/mean_length": 108.34375381469727, "completions/min_length": 41.75, "epoch": 6.842889054355919, "grad_norm": 0.004831719486602354, "kl": 0.27099609375, "learning_rate": 2.3060265348462776e-07, "loss": 0.0002709978725761175, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4592, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.25, "completions/mean_length": 114.63541793823242, "completions/min_length": 37.0, "epoch": 6.844378257632167, "grad_norm": 0.006455294625213169, "kl": 0.258056640625, "learning_rate": 2.30403451902352e-07, "loss": 0.00025754969101399183, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4593, "train_speed(iter/s)": 0.027175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 105.31250190734863, "completions/min_length": 42.75, "epoch": 6.845867460908414, "grad_norm": 0.00323365789583109, "kl": 0.2666015625, "learning_rate": 2.3020431063382483e-07, "loss": 0.0002665598294697702, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4594, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.25, "completions/mean_length": 108.46875381469727, "completions/min_length": 44.75, "epoch": 6.8473566641846615, "grad_norm": 1.2644816912787793, "kl": 0.27685546875, "learning_rate": 2.3000522972359797e-07, "loss": -0.009672986343502998, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4595, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 99.71875381469727, "completions/min_length": 38.25, "epoch": 6.848845867460908, "grad_norm": 0.0029637068369057595, "kl": 0.28369140625, "learning_rate": 2.2980620921620935e-07, "loss": 0.00028305751038715243, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4596, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 104.57291793823242, "completions/min_length": 44.0, "epoch": 6.850335070737156, "grad_norm": 0.0029025151450356645, "kl": 0.24853515625, "learning_rate": 2.296072491561838e-07, "loss": 0.00024830561596900225, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4597, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 106.10416984558105, "completions/min_length": 48.75, "epoch": 6.851824274013403, "grad_norm": 0.003270731743886884, "kl": 0.27294921875, "learning_rate": 2.2940834958803222e-07, "loss": 0.0002724849327933043, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4598, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 98.51041984558105, "completions/min_length": 38.5, "epoch": 6.85331347728965, "grad_norm": 0.046001244370862546, "kl": 0.294921875, "learning_rate": 2.2920951055625204e-07, "loss": 0.00029515987262129784, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4599, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.75, "completions/mean_length": 94.67708396911621, "completions/min_length": 43.5, "epoch": 6.854802680565897, "grad_norm": 0.621393113099907, "kl": 0.46337890625, "learning_rate": 2.2901073210532767e-07, "loss": 0.013700539246201515, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4600, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.75, "completions/mean_length": 102.51041984558105, "completions/min_length": 41.25, "epoch": 6.856291883842145, "grad_norm": 0.0031616846430556687, "kl": 0.2880859375, "learning_rate": 2.288120142797289e-07, "loss": 0.00028841468156315386, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4601, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.0, "completions/mean_length": 110.80208587646484, "completions/min_length": 45.25, "epoch": 6.857781087118392, "grad_norm": 0.0029080009732103864, "kl": 0.262939453125, "learning_rate": 2.2861335712391328e-07, "loss": 0.0002625546185299754, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4602, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.25, "completions/mean_length": 95.52083587646484, "completions/min_length": 33.0, "epoch": 6.8592702903946385, "grad_norm": 0.0031784544790074863, "kl": 0.28466796875, "learning_rate": 2.284147606823234e-07, "loss": 0.00028485164511948824, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4603, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 102.96875381469727, "completions/min_length": 40.25, "epoch": 6.860759493670886, "grad_norm": 0.0029657759919861715, "kl": 0.291015625, "learning_rate": 2.2821622499938948e-07, "loss": 0.00029029184952378273, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4604, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 105.43750381469727, "completions/min_length": 36.0, "epoch": 6.862248696947133, "grad_norm": 0.003576272971711444, "kl": 0.27294921875, "learning_rate": 2.2801775011952746e-07, "loss": 0.0002723265206441283, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4605, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.0, "completions/mean_length": 118.47916793823242, "completions/min_length": 34.5, "epoch": 6.863737900223381, "grad_norm": 1.1490744939492552, "kl": 0.259765625, "learning_rate": 2.2781933608713966e-07, "loss": 0.01721193641424179, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4606, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 96.80208587646484, "completions/min_length": 34.25, "epoch": 6.8652271034996275, "grad_norm": 0.05283334353267836, "kl": 0.3017578125, "learning_rate": 2.2762098294661553e-07, "loss": 0.0003014601825270802, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4607, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 105.75000190734863, "completions/min_length": 44.75, "epoch": 6.866716306775875, "grad_norm": 0.9534169259187719, "kl": 0.26220703125, "learning_rate": 2.2742269074232967e-07, "loss": 0.007669870741665363, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4608, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 96.48958587646484, "completions/min_length": 37.25, "epoch": 6.868205510052122, "grad_norm": 0.0029987495337288374, "kl": 0.29638671875, "learning_rate": 2.2722445951864422e-07, "loss": 0.00029653351521119475, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4609, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 102.38541984558105, "completions/min_length": 46.75, "epoch": 6.86969471332837, "grad_norm": 0.0031458164561668574, "kl": 0.2900390625, "learning_rate": 2.2702628931990698e-07, "loss": 0.0002900294493883848, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4610, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.25, "completions/mean_length": 98.46875190734863, "completions/min_length": 42.75, "epoch": 6.8711839166046165, "grad_norm": 0.8615613140049123, "kl": 0.26220703125, "learning_rate": 2.268281801904522e-07, "loss": 0.0026307920925319195, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4611, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 95.50000381469727, "completions/min_length": 44.75, "epoch": 6.872673119880863, "grad_norm": 0.0033321887193742297, "kl": 0.26611328125, "learning_rate": 2.2663013217460103e-07, "loss": 0.0002663502818904817, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4612, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 94.40625381469727, "completions/min_length": 34.25, "epoch": 6.874162323157111, "grad_norm": 0.005499348821179902, "kl": 0.296875, "learning_rate": 2.2643214531665977e-07, "loss": 0.00029721122700721025, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4613, "train_speed(iter/s)": 0.027182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.25, "completions/mean_length": 101.52083778381348, "completions/min_length": 33.0, "epoch": 6.875651526433359, "grad_norm": 0.0029156682806217526, "kl": 0.25244140625, "learning_rate": 2.2623421966092233e-07, "loss": 0.0002527990145608783, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4614, "train_speed(iter/s)": 0.027182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.25, "completions/mean_length": 102.33333587646484, "completions/min_length": 32.75, "epoch": 6.877140729709605, "grad_norm": 0.12487271749589902, "kl": 0.341796875, "learning_rate": 2.2603635525166814e-07, "loss": 0.0003418118867557496, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4615, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 101.16666984558105, "completions/min_length": 43.25, "epoch": 6.878629932985852, "grad_norm": 1.7766435595500691, "kl": 0.289794921875, "learning_rate": 2.2583855213316322e-07, "loss": -0.0050519127398729324, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4616, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 98.27083587646484, "completions/min_length": 37.25, "epoch": 6.8801191362621, "grad_norm": 0.7657772822164675, "kl": 0.277099609375, "learning_rate": 2.2564081034965965e-07, "loss": -0.0026014968752861023, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4617, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 103.80208396911621, "completions/min_length": 43.75, "epoch": 6.881608339538347, "grad_norm": 1.6323214929487604, "kl": 0.273193359375, "learning_rate": 2.2544312994539587e-07, "loss": -0.0017486249562352896, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4618, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 104.66666984558105, "completions/min_length": 37.5, "epoch": 6.883097542814594, "grad_norm": 1.837038077712934, "kl": 0.28515625, "learning_rate": 2.25245510964597e-07, "loss": -0.005769406445324421, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.33919306844472885, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4619, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 100.90625190734863, "completions/min_length": 33.0, "epoch": 6.884586746090841, "grad_norm": 0.0038284866378848555, "kl": 0.2822265625, "learning_rate": 2.250479534514738e-07, "loss": 0.0002819345099851489, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4620, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 98.45833587646484, "completions/min_length": 36.25, "epoch": 6.886075949367089, "grad_norm": 0.003490315771800439, "kl": 0.29150390625, "learning_rate": 2.2485045745022368e-07, "loss": 0.0002918036188930273, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4621, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.5, "completions/mean_length": 98.39583587646484, "completions/min_length": 34.75, "epoch": 6.887565152643336, "grad_norm": 0.0030998930061826406, "kl": 0.2919921875, "learning_rate": 2.2465302300503008e-07, "loss": 0.0002919561229646206, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4622, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 89.14583587646484, "completions/min_length": 34.75, "epoch": 6.889054355919583, "grad_norm": 0.004125254025623238, "kl": 0.31591796875, "learning_rate": 2.2445565016006263e-07, "loss": 0.0003158121835440397, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4623, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.0, "completions/mean_length": 94.84375190734863, "completions/min_length": 36.25, "epoch": 6.89054355919583, "grad_norm": 0.0033235362101184792, "kl": 0.2919921875, "learning_rate": 2.2425833895947755e-07, "loss": 0.0002915312070399523, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4624, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.0, "completions/mean_length": 97.45833587646484, "completions/min_length": 39.0, "epoch": 6.892032762472077, "grad_norm": 1.453445767249244, "kl": 0.29248046875, "learning_rate": 2.2406108944741693e-07, "loss": -0.002462539589032531, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4625, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 92.19791984558105, "completions/min_length": 36.0, "epoch": 6.893521965748325, "grad_norm": 0.002932807402141931, "kl": 0.30615234375, "learning_rate": 2.238639016680091e-07, "loss": 0.00030525302281603217, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4626, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 86.55208587646484, "completions/min_length": 29.25, "epoch": 6.895011169024572, "grad_norm": 0.003284141980857358, "kl": 0.3212890625, "learning_rate": 2.2366677566536862e-07, "loss": 0.0003214605385437608, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4627, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 99.48958587646484, "completions/min_length": 39.25, "epoch": 6.896500372300819, "grad_norm": 0.002863096768648751, "kl": 0.270751953125, "learning_rate": 2.2346971148359627e-07, "loss": 0.00027080203290097415, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4628, "train_speed(iter/s)": 0.027183 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.0, "completions/mean_length": 102.08333587646484, "completions/min_length": 35.0, "epoch": 6.897989575577066, "grad_norm": 0.0520215044204667, "kl": 0.2998046875, "learning_rate": 2.2327270916677882e-07, "loss": 0.0002993516391143203, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4629, "train_speed(iter/s)": 0.027184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 97.91666793823242, "completions/min_length": 47.5, "epoch": 6.899478778853314, "grad_norm": 0.0033131953991438344, "kl": 0.28857421875, "learning_rate": 2.230757687589896e-07, "loss": 0.00028885016217827797, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4630, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 89.02083587646484, "completions/min_length": 38.5, "epoch": 6.90096798212956, "grad_norm": 0.003535438106287881, "kl": 0.32421875, "learning_rate": 2.2287889030428768e-07, "loss": 0.00032445916440337896, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4631, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 101.95833587646484, "completions/min_length": 43.75, "epoch": 6.902457185405808, "grad_norm": 1.6004084358202169, "kl": 0.260986328125, "learning_rate": 2.2268207384671844e-07, "loss": -0.01866208203136921, "memory(GiB)": 112.53, "reward": 1.6145834028720856, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4632, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 89.94791984558105, "completions/min_length": 40.5, "epoch": 6.903946388682055, "grad_norm": 1.4071445836202585, "kl": 0.3193359375, "learning_rate": 2.224853194303134e-07, "loss": 0.0010833375854417682, "memory(GiB)": 112.53, "reward": 1.6354167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166865348816, "rewards/CineAccuracyORM/std": 0.4884019047021866, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4633, "train_speed(iter/s)": 0.027187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 93.84375381469727, "completions/min_length": 44.0, "epoch": 6.905435591958303, "grad_norm": 0.0034311382964597576, "kl": 0.291015625, "learning_rate": 2.2228862709908997e-07, "loss": 0.00029051274759694934, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4634, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 104.62500190734863, "completions/min_length": 43.25, "epoch": 6.906924795234549, "grad_norm": 0.003085791758434625, "kl": 0.246337890625, "learning_rate": 2.2209199689705221e-07, "loss": 0.0002466284786351025, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4635, "train_speed(iter/s)": 0.027187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.75, "completions/mean_length": 96.51041984558105, "completions/min_length": 36.25, "epoch": 6.908413998510797, "grad_norm": 0.003251359609551996, "kl": 0.285400390625, "learning_rate": 2.218954288681898e-07, "loss": 0.00028517632745206356, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4636, "train_speed(iter/s)": 0.027187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.5, "completions/mean_length": 102.19791793823242, "completions/min_length": 40.25, "epoch": 6.909903201787044, "grad_norm": 0.030706051715622715, "kl": 0.303466796875, "learning_rate": 2.2169892305647864e-07, "loss": 0.00030300376238301396, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4637, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 94.63541984558105, "completions/min_length": 40.75, "epoch": 6.911392405063291, "grad_norm": 0.00316952379131796, "kl": 0.27734375, "learning_rate": 2.215024795058807e-07, "loss": 0.0002774283057078719, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4638, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 95.86458587646484, "completions/min_length": 35.0, "epoch": 6.912881608339538, "grad_norm": 1.13952639689659, "kl": 0.262939453125, "learning_rate": 2.213060982603439e-07, "loss": -0.005120773799717426, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4639, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 93.02083587646484, "completions/min_length": 38.0, "epoch": 6.914370811615786, "grad_norm": 0.0033555283915180983, "kl": 0.29443359375, "learning_rate": 2.2110977936380287e-07, "loss": 0.0002944706939160824, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4640, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 95.43750190734863, "completions/min_length": 26.5, "epoch": 6.915860014892033, "grad_norm": 0.0031017837003329877, "kl": 0.274169921875, "learning_rate": 2.2091352286017713e-07, "loss": 0.0002743355871643871, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4641, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 98.10416984558105, "completions/min_length": 39.5, "epoch": 6.91734921816828, "grad_norm": 1.5117326876212502, "kl": 0.302978515625, "learning_rate": 2.207173287933734e-07, "loss": -0.010330811142921448, "memory(GiB)": 112.53, "reward": 1.6979167461395264, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166865348816, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4642, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 88.53125190734863, "completions/min_length": 41.5, "epoch": 6.918838421444527, "grad_norm": 0.0030822839958561435, "kl": 0.31201171875, "learning_rate": 2.2052119720728373e-07, "loss": 0.0003119840403087437, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4643, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/mean_length": 97.02083587646484, "completions/min_length": 40.0, "epoch": 6.920327624720774, "grad_norm": 0.003221626830478753, "kl": 0.26806640625, "learning_rate": 2.203251281457863e-07, "loss": 0.00026854954194277525, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4644, "train_speed(iter/s)": 0.027187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 96.42708587646484, "completions/min_length": 39.75, "epoch": 6.921816827997022, "grad_norm": 0.007771859822866467, "kl": 0.26611328125, "learning_rate": 2.2012912165274584e-07, "loss": 0.00026571028865873814, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4645, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 90.40625190734863, "completions/min_length": 40.0, "epoch": 6.923306031273269, "grad_norm": 0.003621899370479329, "kl": 0.29931640625, "learning_rate": 2.1993317777201192e-07, "loss": 0.0002992811205331236, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4646, "train_speed(iter/s)": 0.027187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.5, "completions/mean_length": 95.16666793823242, "completions/min_length": 42.5, "epoch": 6.924795234549516, "grad_norm": 1.0313620073361147, "kl": 0.27978515625, "learning_rate": 2.1973729654742145e-07, "loss": -0.0008532420615665615, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4647, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 90.63541984558105, "completions/min_length": 34.5, "epoch": 6.926284437825763, "grad_norm": 0.0032779824283394386, "kl": 0.30517578125, "learning_rate": 2.1954147802279644e-07, "loss": 0.0003050102386623621, "memory(GiB)": 112.53, "reward": 1.4166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.4166666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4648, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 96.43750190734863, "completions/min_length": 39.75, "epoch": 6.927773641102011, "grad_norm": 0.003123228096587939, "kl": 0.28076171875, "learning_rate": 2.1934572224194496e-07, "loss": 0.00028064544312655926, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4649, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 100.17708396911621, "completions/min_length": 38.25, "epoch": 6.929262844378258, "grad_norm": 0.0034116692961447576, "kl": 0.2802734375, "learning_rate": 2.1915002924866177e-07, "loss": 0.00028024017228744924, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4650, "train_speed(iter/s)": 0.027184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 88.90625381469727, "completions/min_length": 33.75, "epoch": 6.930752047654504, "grad_norm": 0.003140354072144481, "kl": 0.291259765625, "learning_rate": 2.1895439908672626e-07, "loss": 0.0002911021001636982, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4651, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 103.22916793823242, "completions/min_length": 46.75, "epoch": 6.932241250930752, "grad_norm": 0.007427375781155328, "kl": 0.26904296875, "learning_rate": 2.1875883179990512e-07, "loss": 0.00026866470579989254, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4652, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 93.16666984558105, "completions/min_length": 41.25, "epoch": 6.933730454207, "grad_norm": 0.0033267363211777665, "kl": 0.29052734375, "learning_rate": 2.1856332743195016e-07, "loss": 0.00029015878681093454, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4653, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 85.67708778381348, "completions/min_length": 28.5, "epoch": 6.9352196574832465, "grad_norm": 0.0033106712815630184, "kl": 0.3193359375, "learning_rate": 2.183678860265994e-07, "loss": 0.00031937676249071956, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4654, "train_speed(iter/s)": 0.027182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.75, "completions/mean_length": 104.14583587646484, "completions/min_length": 34.25, "epoch": 6.936708860759493, "grad_norm": 0.0029079188688672054, "kl": 0.27294921875, "learning_rate": 2.1817250762757655e-07, "loss": 0.00027336610946804285, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4655, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.5, "completions/mean_length": 103.68750381469727, "completions/min_length": 45.5, "epoch": 6.938198064035741, "grad_norm": 0.003088082153845271, "kl": 0.26904296875, "learning_rate": 2.1797719227859141e-07, "loss": 0.0002688986132852733, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4656, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.5, "completions/mean_length": 89.12500190734863, "completions/min_length": 37.25, "epoch": 6.939687267311988, "grad_norm": 1.357116375783888, "kl": 0.3203125, "learning_rate": 2.1778194002333984e-07, "loss": -0.011903139762580395, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4657, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 96.14583778381348, "completions/min_length": 43.25, "epoch": 6.9411764705882355, "grad_norm": 0.915066905683049, "kl": 0.2919921875, "learning_rate": 2.1758675090550328e-07, "loss": 0.025061212480068207, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4658, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 98.11458587646484, "completions/min_length": 32.25, "epoch": 6.942665673864482, "grad_norm": 0.003259930334660421, "kl": 0.276123046875, "learning_rate": 2.1739162496874918e-07, "loss": 0.00027617448358796537, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4659, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 96.77083396911621, "completions/min_length": 40.75, "epoch": 6.94415487714073, "grad_norm": 0.0032530417830124126, "kl": 0.29931640625, "learning_rate": 2.1719656225673077e-07, "loss": 0.0002997337142005563, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4660, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 90.98958587646484, "completions/min_length": 38.25, "epoch": 6.945644080416977, "grad_norm": 0.0032383040362885993, "kl": 0.2763671875, "learning_rate": 2.1700156281308707e-07, "loss": 0.00027670207782648504, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4661, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 169.75, "completions/mean_length": 84.25000381469727, "completions/min_length": 30.75, "epoch": 6.9471332836932245, "grad_norm": 0.0037505395530256073, "kl": 0.30517578125, "learning_rate": 2.168066266814434e-07, "loss": 0.00030509126372635365, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4662, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.25, "completions/mean_length": 89.89583587646484, "completions/min_length": 38.25, "epoch": 6.948622486969471, "grad_norm": 0.003610954796146964, "kl": 0.30908203125, "learning_rate": 2.1661175390541042e-07, "loss": 0.00030896507087163627, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4663, "train_speed(iter/s)": 0.027182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 87.75000190734863, "completions/min_length": 41.25, "epoch": 6.950111690245718, "grad_norm": 0.0032710508917191848, "kl": 0.28759765625, "learning_rate": 2.1641694452858483e-07, "loss": 0.00028805132023990154, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4664, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 82.88541984558105, "completions/min_length": 27.25, "epoch": 6.951600893521966, "grad_norm": 0.0042433148842961265, "kl": 0.3173828125, "learning_rate": 2.16222198594549e-07, "loss": 0.0003172781434841454, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4665, "train_speed(iter/s)": 0.027183 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 96.27083587646484, "completions/min_length": 40.0, "epoch": 6.953090096798213, "grad_norm": 0.0038100121727741403, "kl": 0.28564453125, "learning_rate": 2.160275161468713e-07, "loss": 0.00028568279230967164, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4666, "train_speed(iter/s)": 0.027183 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 95.98958587646484, "completions/min_length": 42.75, "epoch": 6.95457930007446, "grad_norm": 0.0029822986347230553, "kl": 0.26953125, "learning_rate": 2.1583289722910557e-07, "loss": 0.0002691830159164965, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4667, "train_speed(iter/s)": 0.027182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 89.16666793823242, "completions/min_length": 38.5, "epoch": 6.956068503350707, "grad_norm": 0.003778229216203245, "kl": 0.314453125, "learning_rate": 2.1563834188479207e-07, "loss": 0.0003146918024867773, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4668, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 97.71875190734863, "completions/min_length": 38.5, "epoch": 6.957557706626955, "grad_norm": 0.003792195899032487, "kl": 0.27783203125, "learning_rate": 2.154438501574562e-07, "loss": 0.00027770380256697536, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4669, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 98.84375381469727, "completions/min_length": 37.5, "epoch": 6.9590469099032015, "grad_norm": 2.0005867739869174, "kl": 0.27099609375, "learning_rate": 2.152494220906094e-07, "loss": 0.00016903904906939715, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4670, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 100.77083396911621, "completions/min_length": 42.0, "epoch": 6.960536113179449, "grad_norm": 0.0032947871533551706, "kl": 0.27099609375, "learning_rate": 2.150550577277489e-07, "loss": 0.0002713157155085355, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4671, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 102.73958396911621, "completions/min_length": 42.25, "epoch": 6.962025316455696, "grad_norm": 0.0034175276864755527, "kl": 0.27197265625, "learning_rate": 2.148607571123574e-07, "loss": 0.00027182858320884407, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4672, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 101.06250190734863, "completions/min_length": 40.25, "epoch": 6.963514519731944, "grad_norm": 0.003398252834397632, "kl": 0.279541015625, "learning_rate": 2.1466652028790383e-07, "loss": 0.0002792525920085609, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4673, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.0, "completions/mean_length": 82.38541984558105, "completions/min_length": 26.0, "epoch": 6.9650037230081905, "grad_norm": 0.0037208530488548456, "kl": 0.31494140625, "learning_rate": 2.1447234729784246e-07, "loss": 0.0003152758872602135, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4674, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 99.57291984558105, "completions/min_length": 35.0, "epoch": 6.966492926284438, "grad_norm": 0.003279453172144904, "kl": 0.28369140625, "learning_rate": 2.1427823818561335e-07, "loss": 0.00028374852263368666, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4675, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.5, "completions/mean_length": 93.12500190734863, "completions/min_length": 39.5, "epoch": 6.967982129560685, "grad_norm": 0.003573257728701717, "kl": 0.279296875, "learning_rate": 2.1408419299464242e-07, "loss": 0.00027946551563218236, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4676, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/mean_length": 95.41666984558105, "completions/min_length": 46.5, "epoch": 6.969471332836932, "grad_norm": 0.00324907462376156, "kl": 0.289306640625, "learning_rate": 2.1389021176834082e-07, "loss": 0.0002888857270590961, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4677, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 93.53125190734863, "completions/min_length": 42.75, "epoch": 6.9709605361131795, "grad_norm": 0.0033370601432477893, "kl": 0.288330078125, "learning_rate": 2.1369629455010647e-07, "loss": 0.0002881819964386523, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4678, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.0, "completions/mean_length": 108.35416984558105, "completions/min_length": 36.5, "epoch": 6.972449739389427, "grad_norm": 0.002862466909687784, "kl": 0.27734375, "learning_rate": 2.135024413833214e-07, "loss": 0.0002769511775113642, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4679, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 87.92708587646484, "completions/min_length": 37.5, "epoch": 6.973938942665674, "grad_norm": 0.09022042349139683, "kl": 0.30908203125, "learning_rate": 2.1330865231135476e-07, "loss": 0.0003088581725023687, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4680, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 94.69791984558105, "completions/min_length": 30.0, "epoch": 6.975428145941921, "grad_norm": 0.0036109109609478887, "kl": 0.289306640625, "learning_rate": 2.1311492737756053e-07, "loss": 0.0002890514733735472, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4681, "train_speed(iter/s)": 0.027175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 91.86458587646484, "completions/min_length": 34.0, "epoch": 6.976917349218168, "grad_norm": 1.0554442316339925, "kl": 0.262939453125, "learning_rate": 2.1292126662527844e-07, "loss": 0.0005662888288497925, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4682, "train_speed(iter/s)": 0.027175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 86.16666984558105, "completions/min_length": 30.75, "epoch": 6.978406552494415, "grad_norm": 0.003277131067346275, "kl": 0.293701171875, "learning_rate": 2.127276700978345e-07, "loss": 0.0002936607343144715, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4683, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 85.10416984558105, "completions/min_length": 36.25, "epoch": 6.979895755770663, "grad_norm": 1.3934507437689028, "kl": 0.288330078125, "learning_rate": 2.1253413783853918e-07, "loss": 0.011104478500783443, "memory(GiB)": 112.53, "reward": 1.8645833432674408, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.21880721300840378, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4684, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 96.42708587646484, "completions/min_length": 28.25, "epoch": 6.98138495904691, "grad_norm": 0.0030942878738076766, "kl": 0.298828125, "learning_rate": 2.123406698906897e-07, "loss": 0.00029889371944591403, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4685, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 90.83333396911621, "completions/min_length": 40.25, "epoch": 6.982874162323157, "grad_norm": 0.003356055936793443, "kl": 0.2958984375, "learning_rate": 2.121472662975683e-07, "loss": 0.00029589925543405116, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4686, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 91.79166984558105, "completions/min_length": 39.5, "epoch": 6.984363365599404, "grad_norm": 0.003505389085531415, "kl": 0.2978515625, "learning_rate": 2.1195392710244275e-07, "loss": 0.00029793480644002557, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4687, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.25, "completions/mean_length": 93.80208587646484, "completions/min_length": 40.5, "epoch": 6.985852568875652, "grad_norm": 2.6523207930658548, "kl": 0.27197265625, "learning_rate": 2.117606523485672e-07, "loss": 0.0247479360550642, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4688, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.75, "completions/mean_length": 95.86458587646484, "completions/min_length": 35.75, "epoch": 6.987341772151899, "grad_norm": 0.966639460150507, "kl": 0.2734375, "learning_rate": 2.1156744207917997e-07, "loss": 0.0011173341190442443, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4689, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 80.98958587646484, "completions/min_length": 32.25, "epoch": 6.9888309754281455, "grad_norm": 0.0032594181858276792, "kl": 0.31103515625, "learning_rate": 2.1137429633750664e-07, "loss": 0.0003110365360043943, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4690, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.5, "completions/mean_length": 86.01041793823242, "completions/min_length": 40.25, "epoch": 6.990320178704393, "grad_norm": 1.2675243645047227, "kl": 0.3310546875, "learning_rate": 2.1118121516675668e-07, "loss": -0.00563793582841754, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4691, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.75, "completions/mean_length": 92.70833778381348, "completions/min_length": 38.5, "epoch": 6.991809381980641, "grad_norm": 0.003314252439042493, "kl": 0.259521484375, "learning_rate": 2.109881986101265e-07, "loss": 0.00025929289404302835, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4692, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 90.09375, "completions/min_length": 33.25, "epoch": 6.993298585256888, "grad_norm": 0.002778638147212218, "kl": 0.27978515625, "learning_rate": 2.107952467107973e-07, "loss": 0.0002800020738504827, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4693, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 94.68750190734863, "completions/min_length": 36.5, "epoch": 6.994787788533134, "grad_norm": 1.8634757420731174, "kl": 0.2890625, "learning_rate": 2.1060235951193578e-07, "loss": -0.011173698119819164, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4694, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 93.80208587646484, "completions/min_length": 38.5, "epoch": 6.996276991809382, "grad_norm": 0.0030827339376277817, "kl": 0.287109375, "learning_rate": 2.1040953705669502e-07, "loss": 0.00028756947722285986, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4695, "train_speed(iter/s)": 0.027174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 94.83333396911621, "completions/min_length": 35.0, "epoch": 6.997766195085629, "grad_norm": 2.36638654954008, "kl": 0.29296875, "learning_rate": 2.1021677938821214e-07, "loss": 0.014573400840163231, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4154365845024586, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4696, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 89.96875190734863, "completions/min_length": 32.25, "epoch": 6.999255398361877, "grad_norm": 0.0035009234367084506, "kl": 0.30517578125, "learning_rate": 2.1002408654961125e-07, "loss": 0.0003044774930458516, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4697, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 83.79166793823242, "completions/min_length": 41.5, "epoch": 7.001489203276247, "grad_norm": 0.003955606719692922, "kl": 0.30712890625, "learning_rate": 2.098314585840011e-07, "loss": 0.0003064576012548059, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4698, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.5, "completions/mean_length": 97.31250381469727, "completions/min_length": 42.75, "epoch": 7.0029784065524945, "grad_norm": 0.0034024841879728733, "kl": 0.2783203125, "learning_rate": 2.0963889553447594e-07, "loss": 0.00027834775391966105, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4699, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 96.72916793823242, "completions/min_length": 44.0, "epoch": 7.004467609828741, "grad_norm": 0.0030964107899335734, "kl": 0.28662109375, "learning_rate": 2.0944639744411623e-07, "loss": 0.0002860647509805858, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4700, "train_speed(iter/s)": 0.027174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 87.27083396911621, "completions/min_length": 37.25, "epoch": 7.005956813104989, "grad_norm": 0.0035247400319554445, "kl": 0.30322265625, "learning_rate": 2.0925396435598664e-07, "loss": 0.000302740401821211, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4701, "train_speed(iter/s)": 0.027174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 90.93750190734863, "completions/min_length": 36.5, "epoch": 7.007446016381236, "grad_norm": 0.773751645613434, "kl": 0.28515625, "learning_rate": 2.0906159631313864e-07, "loss": -0.010284969583153725, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4702, "train_speed(iter/s)": 0.027174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 80.19791984558105, "completions/min_length": 31.75, "epoch": 7.0089352196574835, "grad_norm": 0.0038292025085347597, "kl": 0.3466796875, "learning_rate": 2.0886929335860825e-07, "loss": 0.00034636491909623146, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4703, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 92.16666793823242, "completions/min_length": 45.0, "epoch": 7.01042442293373, "grad_norm": 0.0030948687778778973, "kl": 0.287109375, "learning_rate": 2.0867705553541725e-07, "loss": 0.0002871431934181601, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4704, "train_speed(iter/s)": 0.027175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.0, "completions/mean_length": 81.28125381469727, "completions/min_length": 32.25, "epoch": 7.011913626209978, "grad_norm": 1.6353774408417983, "kl": 0.3173828125, "learning_rate": 2.0848488288657263e-07, "loss": -0.006702854298055172, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4705, "train_speed(iter/s)": 0.027175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 88.00000190734863, "completions/min_length": 34.5, "epoch": 7.013402829486225, "grad_norm": 0.0033703058096370858, "kl": 0.282470703125, "learning_rate": 2.0829277545506734e-07, "loss": 0.0002824773546308279, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4706, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.5, "completions/mean_length": 78.64583778381348, "completions/min_length": 36.5, "epoch": 7.014892032762472, "grad_norm": 1.0688410090080642, "kl": 0.93359375, "learning_rate": 2.0810073328387918e-07, "loss": 0.005225948058068752, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.572916679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4707, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.75, "completions/mean_length": 93.91666984558105, "completions/min_length": 42.5, "epoch": 7.016381236038719, "grad_norm": 0.003148937698358357, "kl": 0.290771484375, "learning_rate": 2.0790875641597155e-07, "loss": 0.00029073149198666215, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4708, "train_speed(iter/s)": 0.027175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 86.63541984558105, "completions/min_length": 41.0, "epoch": 7.017870439314967, "grad_norm": 0.7298377204642633, "kl": 0.288818359375, "learning_rate": 2.0771684489429325e-07, "loss": -0.008374080993235111, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.760416679084301, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4709, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 84.86458587646484, "completions/min_length": 39.75, "epoch": 7.019359642591214, "grad_norm": 0.004038750303758664, "kl": 0.275390625, "learning_rate": 2.0752499876177825e-07, "loss": 0.0002755080349743366, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4710, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 87.77083587646484, "completions/min_length": 40.0, "epoch": 7.0208488458674605, "grad_norm": 0.0030666745507677614, "kl": 0.30615234375, "learning_rate": 2.0733321806134646e-07, "loss": 0.0003067667712457478, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4711, "train_speed(iter/s)": 0.027175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 82.51041984558105, "completions/min_length": 38.25, "epoch": 7.022338049143708, "grad_norm": 0.003714868277154907, "kl": 0.3154296875, "learning_rate": 2.071415028359026e-07, "loss": 0.0003150872071273625, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4712, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.0, "completions/mean_length": 99.69791984558105, "completions/min_length": 41.75, "epoch": 7.023827252419955, "grad_norm": 0.003123906498293435, "kl": 0.26416015625, "learning_rate": 2.069498531283369e-07, "loss": 0.0002639701124280691, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4713, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 85.40625190734863, "completions/min_length": 42.75, "epoch": 7.025316455696203, "grad_norm": 0.0031448678275516817, "kl": 0.29443359375, "learning_rate": 2.06758268981525e-07, "loss": 0.00029423911473713815, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4714, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 91.84375381469727, "completions/min_length": 36.25, "epoch": 7.0268056589724495, "grad_norm": 0.0033670109275645093, "kl": 0.2998046875, "learning_rate": 2.0656675043832755e-07, "loss": 0.00030001328559592366, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4715, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 84.27083778381348, "completions/min_length": 38.0, "epoch": 7.028294862248697, "grad_norm": 0.004616890867593769, "kl": 0.273193359375, "learning_rate": 2.063752975415915e-07, "loss": 0.00027327699353918433, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4716, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.25, "completions/mean_length": 93.42708587646484, "completions/min_length": 45.0, "epoch": 7.029784065524944, "grad_norm": 0.0037050884221712112, "kl": 0.2958984375, "learning_rate": 2.0618391033414757e-07, "loss": 0.0002961561258416623, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4717, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 85.33333396911621, "completions/min_length": 40.25, "epoch": 7.031273268801192, "grad_norm": 0.003492068399742293, "kl": 0.3134765625, "learning_rate": 2.0599258885881316e-07, "loss": 0.0003136768937110901, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4718, "train_speed(iter/s)": 0.027175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.5, "completions/mean_length": 87.70833587646484, "completions/min_length": 36.5, "epoch": 7.032762472077438, "grad_norm": 0.0034225194436534943, "kl": 0.30224609375, "learning_rate": 2.0580133315839036e-07, "loss": 0.0003016848349943757, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4719, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 86.37500381469727, "completions/min_length": 31.0, "epoch": 7.034251675353686, "grad_norm": 0.0033870048578500496, "kl": 0.2880859375, "learning_rate": 2.0561014327566633e-07, "loss": 0.00028792195371352136, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4720, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 99.75000190734863, "completions/min_length": 30.75, "epoch": 7.035740878629933, "grad_norm": 1.1051698562381398, "kl": 0.296875, "learning_rate": 2.0541901925341443e-07, "loss": -0.014487597160041332, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4721, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 93.05208587646484, "completions/min_length": 44.5, "epoch": 7.037230081906181, "grad_norm": 0.0034344485150646924, "kl": 0.283203125, "learning_rate": 2.0522796113439184e-07, "loss": 0.00028314959490671754, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4722, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 99.60416984558105, "completions/min_length": 43.5, "epoch": 7.038719285182427, "grad_norm": 0.0033243713173636254, "kl": 0.28271484375, "learning_rate": 2.0503696896134237e-07, "loss": 0.0002826341660693288, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4723, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.75, "completions/mean_length": 83.75000190734863, "completions/min_length": 35.0, "epoch": 7.040208488458674, "grad_norm": 3.6972228647461187, "kl": 0.30859375, "learning_rate": 2.0484604277699436e-07, "loss": -0.0012081408640369773, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4724, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 91.84375381469727, "completions/min_length": 37.0, "epoch": 7.041697691734922, "grad_norm": 0.0029471737078997982, "kl": 0.30517578125, "learning_rate": 2.0465518262406134e-07, "loss": 0.0003051109961234033, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4725, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 93.03125381469727, "completions/min_length": 31.25, "epoch": 7.043186895011169, "grad_norm": 0.0031049169703698805, "kl": 0.28173828125, "learning_rate": 2.0446438854524279e-07, "loss": 0.0002815525222104043, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4726, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 87.43750190734863, "completions/min_length": 41.25, "epoch": 7.044676098287416, "grad_norm": 0.003432518347989255, "kl": 0.29443359375, "learning_rate": 2.042736605832222e-07, "loss": 0.0002947288448922336, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4727, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 83.53125381469727, "completions/min_length": 32.75, "epoch": 7.046165301563663, "grad_norm": 0.003036877409177236, "kl": 0.3134765625, "learning_rate": 2.040829987806697e-07, "loss": 0.0003134388825856149, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4728, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.0, "completions/mean_length": 92.01041984558105, "completions/min_length": 45.0, "epoch": 7.047654504839911, "grad_norm": 0.04474224898434338, "kl": 0.31201171875, "learning_rate": 2.0389240318023905e-07, "loss": 0.00031146296532824636, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4729, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 88.18750190734863, "completions/min_length": 32.5, "epoch": 7.049143708116158, "grad_norm": 0.0030002313983511293, "kl": 0.294921875, "learning_rate": 2.0370187382457066e-07, "loss": 0.00029477724456228316, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4730, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 84.28125381469727, "completions/min_length": 41.5, "epoch": 7.050632911392405, "grad_norm": 0.0035198824951257, "kl": 0.3056640625, "learning_rate": 2.035114107562892e-07, "loss": 0.0003054390545003116, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4731, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.75, "completions/mean_length": 93.69791984558105, "completions/min_length": 37.5, "epoch": 7.052122114668652, "grad_norm": 0.004946042442443099, "kl": 0.28076171875, "learning_rate": 2.0332101401800471e-07, "loss": 0.00028034468414261937, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4732, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 81.72916984558105, "completions/min_length": 35.75, "epoch": 7.0536113179449, "grad_norm": 0.003589376747643163, "kl": 0.3056640625, "learning_rate": 2.03130683652313e-07, "loss": 0.00030580456950701773, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4733, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 94.15625381469727, "completions/min_length": 39.75, "epoch": 7.055100521221147, "grad_norm": 0.0034557862600038845, "kl": 0.28271484375, "learning_rate": 2.0294041970179372e-07, "loss": 0.00028345955070108175, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4734, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 92.09375381469727, "completions/min_length": 44.25, "epoch": 7.056589724497394, "grad_norm": 0.0040232540498588215, "kl": 0.29541015625, "learning_rate": 2.0275022220901305e-07, "loss": 0.00029503743280656636, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4735, "train_speed(iter/s)": 0.027183 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 94.39583396911621, "completions/min_length": 30.0, "epoch": 7.058078927773641, "grad_norm": 0.003948953178902251, "kl": 0.264892578125, "learning_rate": 2.0256009121652147e-07, "loss": 0.0002649100497364998, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4736, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 92.14583587646484, "completions/min_length": 40.5, "epoch": 7.059568131049888, "grad_norm": 0.0035738862532806457, "kl": 0.287109375, "learning_rate": 2.0237002676685465e-07, "loss": 0.000286882248474285, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4737, "train_speed(iter/s)": 0.027187 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.5, "completions/mean_length": 87.58333587646484, "completions/min_length": 38.75, "epoch": 7.061057334326136, "grad_norm": 1.781482841752141, "kl": 0.32177734375, "learning_rate": 2.0218002890253404e-07, "loss": -0.008034564554691315, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.3712640330195427, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4738, "train_speed(iter/s)": 0.027185 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.25, "completions/mean_length": 79.84375190734863, "completions/min_length": 36.0, "epoch": 7.062546537602382, "grad_norm": 0.003953025620588577, "kl": 0.3310546875, "learning_rate": 2.0199009766606505e-07, "loss": 0.0003304056008346379, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4739, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 83.78125381469727, "completions/min_length": 29.0, "epoch": 7.06403574087863, "grad_norm": 0.003122914521331551, "kl": 0.32373046875, "learning_rate": 2.0180023309993932e-07, "loss": 0.0003239075595047325, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4740, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.75, "completions/mean_length": 99.61458778381348, "completions/min_length": 36.0, "epoch": 7.065524944154877, "grad_norm": 0.004198945744183189, "kl": 0.275390625, "learning_rate": 2.0161043524663284e-07, "loss": 0.0002756711619440466, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4741, "train_speed(iter/s)": 0.027186 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 86.33333587646484, "completions/min_length": 37.0, "epoch": 7.0670141474311245, "grad_norm": 0.0032244998031469322, "kl": 0.3193359375, "learning_rate": 2.01420704148607e-07, "loss": 0.0003195943427272141, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4742, "train_speed(iter/s)": 0.027184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 96.46875190734863, "completions/min_length": 33.75, "epoch": 7.068503350707371, "grad_norm": 0.0033786423139800445, "kl": 0.296142578125, "learning_rate": 2.0123103984830807e-07, "loss": 0.0002962207072414458, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4743, "train_speed(iter/s)": 0.027183 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.25, "completions/mean_length": 99.16666984558105, "completions/min_length": 35.75, "epoch": 7.069992553983619, "grad_norm": 0.003122446251918162, "kl": 0.267578125, "learning_rate": 2.010414423881674e-07, "loss": 0.0002671128313522786, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4744, "train_speed(iter/s)": 0.027184 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 93.28125381469727, "completions/min_length": 44.25, "epoch": 7.071481757259866, "grad_norm": 0.0032272725893894272, "kl": 0.287841796875, "learning_rate": 2.0085191181060174e-07, "loss": 0.00028772963560186327, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4745, "train_speed(iter/s)": 0.027182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 91.84375190734863, "completions/min_length": 38.75, "epoch": 7.0729709605361135, "grad_norm": 0.652370860367673, "kl": 0.31640625, "learning_rate": 2.0066244815801242e-07, "loss": 0.0016830484382808208, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4746, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 91.34375190734863, "completions/min_length": 41.25, "epoch": 7.07446016381236, "grad_norm": 0.003184169427158285, "kl": 0.26806640625, "learning_rate": 2.0047305147278605e-07, "loss": 0.0002682257036212832, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4747, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 96.04166984558105, "completions/min_length": 43.0, "epoch": 7.075949367088608, "grad_norm": 0.9969659383719236, "kl": 0.289306640625, "learning_rate": 2.0028372179729402e-07, "loss": -0.0002698766766116023, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4748, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.5, "completions/mean_length": 85.61458396911621, "completions/min_length": 35.5, "epoch": 7.077438570364855, "grad_norm": 0.003433320318878873, "kl": 0.296875, "learning_rate": 2.000944591738929e-07, "loss": 0.00029660449945367873, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4749, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.75, "completions/mean_length": 96.16666984558105, "completions/min_length": 37.0, "epoch": 7.078927773641102, "grad_norm": 1.0261736219132487, "kl": 0.2783203125, "learning_rate": 1.999052636449245e-07, "loss": -0.01798151060938835, "memory(GiB)": 112.53, "reward": 1.5104166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5104166716337204, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4750, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 97.97916984558105, "completions/min_length": 35.75, "epoch": 7.080416976917349, "grad_norm": 0.0029133194284313825, "kl": 0.28125, "learning_rate": 1.997161352527152e-07, "loss": 0.00028123275842517614, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4751, "train_speed(iter/s)": 0.027174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.25, "completions/mean_length": 102.22916984558105, "completions/min_length": 44.75, "epoch": 7.081906180193596, "grad_norm": 0.0029071537575104563, "kl": 0.261474609375, "learning_rate": 1.9952707403957657e-07, "loss": 0.00026154000079259276, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4752, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 100.33333587646484, "completions/min_length": 39.5, "epoch": 7.083395383469844, "grad_norm": 0.002975151677741317, "kl": 0.271484375, "learning_rate": 1.9933808004780506e-07, "loss": 0.00027111911913380027, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4753, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 94.32291793823242, "completions/min_length": 40.25, "epoch": 7.084884586746091, "grad_norm": 0.003244051539089688, "kl": 0.2822265625, "learning_rate": 1.9914915331968212e-07, "loss": 0.0002821548259817064, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4754, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 84.25000381469727, "completions/min_length": 38.75, "epoch": 7.086373790022338, "grad_norm": 0.9097009396948704, "kl": 0.328125, "learning_rate": 1.9896029389747408e-07, "loss": 0.0033448245376348495, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4755, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 93.16666984558105, "completions/min_length": 41.75, "epoch": 7.087862993298585, "grad_norm": 0.0032439328263708933, "kl": 0.2978515625, "learning_rate": 1.9877150182343262e-07, "loss": 0.0002975022071041167, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4756, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.0, "completions/mean_length": 92.84375381469727, "completions/min_length": 34.75, "epoch": 7.089352196574833, "grad_norm": 0.003311005796295383, "kl": 0.2734375, "learning_rate": 1.9858277713979377e-07, "loss": 0.0002736157621257007, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4757, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.75, "completions/mean_length": 104.50000381469727, "completions/min_length": 43.0, "epoch": 7.0908413998510795, "grad_norm": 0.907828026815326, "kl": 0.27099609375, "learning_rate": 1.9839411988877857e-07, "loss": 0.0007049217820167542, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4758, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 93.13541793823242, "completions/min_length": 32.25, "epoch": 7.092330603127327, "grad_norm": 0.0031226889563518954, "kl": 0.27978515625, "learning_rate": 1.9820553011259377e-07, "loss": 0.00027950326330028474, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4759, "train_speed(iter/s)": 0.027174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 94.26041984558105, "completions/min_length": 32.5, "epoch": 7.093819806403574, "grad_norm": 0.0034112608930396897, "kl": 0.296875, "learning_rate": 1.9801700785342968e-07, "loss": 0.0002970803761854768, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4760, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 100.36458587646484, "completions/min_length": 29.75, "epoch": 7.095309009679822, "grad_norm": 0.0028293207678290614, "kl": 0.264404296875, "learning_rate": 1.978285531534627e-07, "loss": 0.0002642605686560273, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4761, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 96.09375381469727, "completions/min_length": 37.5, "epoch": 7.0967982129560685, "grad_norm": 0.018596341128546662, "kl": 0.2822265625, "learning_rate": 1.9764016605485352e-07, "loss": 0.00028216978535056114, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4762, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 92.88541984558105, "completions/min_length": 43.0, "epoch": 7.098287416232315, "grad_norm": 0.0032266076083338727, "kl": 0.28271484375, "learning_rate": 1.9745184659974762e-07, "loss": 0.00028271344490349293, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4763, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 85.66666984558105, "completions/min_length": 37.0, "epoch": 7.099776619508563, "grad_norm": 0.009169475394606587, "kl": 0.34716796875, "learning_rate": 1.9726359483027604e-07, "loss": 0.00034694524947553873, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4764, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 95.68750381469727, "completions/min_length": 41.0, "epoch": 7.10126582278481, "grad_norm": 0.0028694736238257675, "kl": 0.2919921875, "learning_rate": 1.9707541078855354e-07, "loss": 0.00029174299561418593, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4765, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 97.71875381469727, "completions/min_length": 38.5, "epoch": 7.1027550260610575, "grad_norm": 0.003887934168979598, "kl": 0.27880859375, "learning_rate": 1.9688729451668111e-07, "loss": 0.0002791447623167187, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4766, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 99.12500381469727, "completions/min_length": 42.75, "epoch": 7.104244229337304, "grad_norm": 0.003099960888470336, "kl": 0.290283203125, "learning_rate": 1.9669924605674316e-07, "loss": 0.0002897733938880265, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4767, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 103.85417175292969, "completions/min_length": 37.5, "epoch": 7.105733432613552, "grad_norm": 0.0032472381092819485, "kl": 0.270263671875, "learning_rate": 1.9651126545081003e-07, "loss": 0.0002702629426494241, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4768, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 101.11458587646484, "completions/min_length": 40.75, "epoch": 7.107222635889799, "grad_norm": 0.002998084802666298, "kl": 0.2705078125, "learning_rate": 1.9632335274093642e-07, "loss": 0.00027081690495833755, "memory(GiB)": 112.53, "reward": 1.5, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4769, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 103.92708587646484, "completions/min_length": 39.25, "epoch": 7.108711839166046, "grad_norm": 0.0029037454920731497, "kl": 0.280517578125, "learning_rate": 1.9613550796916161e-07, "loss": 0.00028037792071700096, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4770, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 94.76041793823242, "completions/min_length": 32.0, "epoch": 7.110201042442293, "grad_norm": 0.03146640494859949, "kl": 0.32080078125, "learning_rate": 1.9594773117751056e-07, "loss": 0.00032063128310255706, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4771, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 81.39583587646484, "completions/min_length": 34.0, "epoch": 7.111690245718541, "grad_norm": 0.0034424190414688986, "kl": 0.3134765625, "learning_rate": 1.9576002240799166e-07, "loss": 0.0003136446757707745, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4772, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 104.10416984558105, "completions/min_length": 42.0, "epoch": 7.113179448994788, "grad_norm": 0.0032373483800783756, "kl": 0.256591796875, "learning_rate": 1.955723817025995e-07, "loss": 0.0002569066418800503, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4773, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 101.82291984558105, "completions/min_length": 35.25, "epoch": 7.114668652271035, "grad_norm": 0.003603828134700787, "kl": 0.2890625, "learning_rate": 1.953848091033124e-07, "loss": 0.00028822728199884295, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4774, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 97.90625190734863, "completions/min_length": 30.75, "epoch": 7.116157855547282, "grad_norm": 0.003182504881796499, "kl": 0.2783203125, "learning_rate": 1.9519730465209384e-07, "loss": 0.0002781337534543127, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4775, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 104.85416984558105, "completions/min_length": 36.75, "epoch": 7.117647058823529, "grad_norm": 0.0034070134392119337, "kl": 0.284423828125, "learning_rate": 1.9500986839089252e-07, "loss": 0.0002842834801413119, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4776, "train_speed(iter/s)": 0.027174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 91.80208587646484, "completions/min_length": 33.75, "epoch": 7.119136262099777, "grad_norm": 2.0074983053552766, "kl": 0.2978515625, "learning_rate": 1.948225003616406e-07, "loss": -0.0010718866251409054, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4777, "train_speed(iter/s)": 0.027174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 101.45833587646484, "completions/min_length": 37.5, "epoch": 7.1206254653760235, "grad_norm": 1.2935405803868854, "kl": 0.266357421875, "learning_rate": 1.9463520060625643e-07, "loss": -0.011739108711481094, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4778, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 102.62500190734863, "completions/min_length": 36.25, "epoch": 7.122114668652271, "grad_norm": 0.002920304605997712, "kl": 0.2734375, "learning_rate": 1.944479691666423e-07, "loss": 0.000273246259894222, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4779, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 88.72916984558105, "completions/min_length": 45.0, "epoch": 7.123603871928518, "grad_norm": 0.00334349815302343, "kl": 0.31298828125, "learning_rate": 1.942608060846852e-07, "loss": 0.0003128963289782405, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4780, "train_speed(iter/s)": 0.027175 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 105.60416984558105, "completions/min_length": 43.0, "epoch": 7.125093075204766, "grad_norm": 1.530534183548258, "kl": 0.31005859375, "learning_rate": 1.9407371140225714e-07, "loss": 0.005251697730273008, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4781, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 95.55208587646484, "completions/min_length": 24.25, "epoch": 7.1265822784810124, "grad_norm": 1.7467178122871099, "kl": 0.28271484375, "learning_rate": 1.9388668516121437e-07, "loss": -0.007536310702562332, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4782, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 90.05208587646484, "completions/min_length": 25.5, "epoch": 7.12807148175726, "grad_norm": 0.0028794959588323912, "kl": 0.284423828125, "learning_rate": 1.9369972740339858e-07, "loss": 0.00028376581030897796, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4783, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.25, "completions/mean_length": 89.11458587646484, "completions/min_length": 34.5, "epoch": 7.129560685033507, "grad_norm": 1.0245963368884121, "kl": 0.28759765625, "learning_rate": 1.9351283817063546e-07, "loss": 0.004653988406062126, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4784, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.75, "completions/mean_length": 94.08333396911621, "completions/min_length": 32.25, "epoch": 7.131049888309755, "grad_norm": 0.003712198480935754, "kl": 0.29052734375, "learning_rate": 1.933260175047356e-07, "loss": 0.0002904917928390205, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4785, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 91.09375381469727, "completions/min_length": 33.0, "epoch": 7.132539091586001, "grad_norm": 0.0040584862691581056, "kl": 0.3056640625, "learning_rate": 1.931392654474942e-07, "loss": 0.0003055329725611955, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4786, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 94.65625190734863, "completions/min_length": 39.0, "epoch": 7.134028294862249, "grad_norm": 0.0032836711467904038, "kl": 0.29638671875, "learning_rate": 1.9295258204069116e-07, "loss": 0.000296133803203702, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4787, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 104.57291984558105, "completions/min_length": 33.75, "epoch": 7.135517498138496, "grad_norm": 1.1555452882699508, "kl": 0.27392578125, "learning_rate": 1.9276596732609112e-07, "loss": -0.018578696995973587, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4788, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 97.16666984558105, "completions/min_length": 32.0, "epoch": 7.137006701414743, "grad_norm": 0.0029404883073595885, "kl": 0.28173828125, "learning_rate": 1.9257942134544331e-07, "loss": 0.00028155912877991796, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4789, "train_speed(iter/s)": 0.027182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 92.41666793823242, "completions/min_length": 36.5, "epoch": 7.13849590469099, "grad_norm": 0.003019130797422749, "kl": 0.29052734375, "learning_rate": 1.923929441404814e-07, "loss": 0.0002904683060478419, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4790, "train_speed(iter/s)": 0.027182 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.0, "completions/mean_length": 116.07291984558105, "completions/min_length": 40.75, "epoch": 7.139985107967237, "grad_norm": 0.003234265230832331, "kl": 0.262451171875, "learning_rate": 1.9220653575292378e-07, "loss": 0.0002620485029183328, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4791, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 97.35416984558105, "completions/min_length": 35.25, "epoch": 7.141474311243485, "grad_norm": 0.003146376505742414, "kl": 0.2841796875, "learning_rate": 1.9202019622447357e-07, "loss": 0.00028382608434185386, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4792, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 102.68750190734863, "completions/min_length": 42.5, "epoch": 7.142963514519732, "grad_norm": 1.096927303544761, "kl": 0.274658203125, "learning_rate": 1.918339255968181e-07, "loss": -0.005566427018493414, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333507180214, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4793, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 104.38541793823242, "completions/min_length": 41.25, "epoch": 7.144452717795979, "grad_norm": 0.003520464558365501, "kl": 0.2841796875, "learning_rate": 1.9164772391162998e-07, "loss": 0.00028374313842505217, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4794, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 102.33333587646484, "completions/min_length": 41.75, "epoch": 7.145941921072226, "grad_norm": 0.0040312194832415, "kl": 0.26708984375, "learning_rate": 1.9146159121056577e-07, "loss": 0.00026681332383304834, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4795, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 109.12500381469727, "completions/min_length": 40.5, "epoch": 7.147431124348474, "grad_norm": 0.0030004987198941316, "kl": 0.259765625, "learning_rate": 1.9127552753526683e-07, "loss": 0.0002597179845906794, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4796, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 93.94791793823242, "completions/min_length": 38.0, "epoch": 7.148920327624721, "grad_norm": 1.3588825877417514, "kl": 0.29736328125, "learning_rate": 1.910895329273591e-07, "loss": 0.004828361794352531, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4797, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 102.8125, "completions/min_length": 29.5, "epoch": 7.150409530900968, "grad_norm": 0.0032255299466833493, "kl": 0.2705078125, "learning_rate": 1.9090360742845275e-07, "loss": 0.0002706379455048591, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4798, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.5, "completions/mean_length": 97.09375190734863, "completions/min_length": 39.5, "epoch": 7.151898734177215, "grad_norm": 0.00292391967842844, "kl": 0.2734375, "learning_rate": 1.907177510801431e-07, "loss": 0.00027384591521695256, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4799, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 104.94791984558105, "completions/min_length": 33.5, "epoch": 7.153387937453463, "grad_norm": 0.003274513213218, "kl": 0.2607421875, "learning_rate": 1.905319639240096e-07, "loss": 0.00026130833430215716, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4800, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 101.43750190734863, "completions/min_length": 37.75, "epoch": 7.15487714072971, "grad_norm": 1.2456838373061794, "kl": 0.281494140625, "learning_rate": 1.9034624600161624e-07, "loss": -0.0020283968187868595, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4801, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 96.04166793823242, "completions/min_length": 30.5, "epoch": 7.156366344005956, "grad_norm": 0.0031075482352028077, "kl": 0.29345703125, "learning_rate": 1.9016059735451156e-07, "loss": 0.0002929006004706025, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4802, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 98.09375190734863, "completions/min_length": 34.5, "epoch": 7.157855547282204, "grad_norm": 0.002999402255328689, "kl": 0.26171875, "learning_rate": 1.8997501802422844e-07, "loss": 0.00026186235481873155, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4803, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.5, "completions/mean_length": 104.04167175292969, "completions/min_length": 42.0, "epoch": 7.159344750558451, "grad_norm": 0.0028886788925250755, "kl": 0.250732421875, "learning_rate": 1.8978950805228488e-07, "loss": 0.000250589830102399, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4804, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 98.21875381469727, "completions/min_length": 28.0, "epoch": 7.160833953834699, "grad_norm": 0.0030148655267132206, "kl": 0.2763671875, "learning_rate": 1.8960406748018225e-07, "loss": 0.0002759287308435887, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4805, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.0, "completions/mean_length": 110.09375381469727, "completions/min_length": 27.25, "epoch": 7.162323157110945, "grad_norm": 0.002918792692678081, "kl": 0.2734375, "learning_rate": 1.894186963494076e-07, "loss": 0.00027351989410817623, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4806, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 104.52083396911621, "completions/min_length": 39.5, "epoch": 7.163812360387193, "grad_norm": 0.003270772540459487, "kl": 0.248046875, "learning_rate": 1.8923339470143167e-07, "loss": 0.0002478339010849595, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4807, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.25, "completions/mean_length": 105.03125190734863, "completions/min_length": 41.25, "epoch": 7.16530156366344, "grad_norm": 0.003197925371152498, "kl": 0.2744140625, "learning_rate": 1.8904816257770973e-07, "loss": 0.0002745653910096735, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4808, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 95.10416793823242, "completions/min_length": 40.0, "epoch": 7.1667907669396875, "grad_norm": 0.0032932944577230964, "kl": 0.2802734375, "learning_rate": 1.888630000196822e-07, "loss": 0.0002804204123094678, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4809, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 93.68750381469727, "completions/min_length": 41.25, "epoch": 7.168279970215934, "grad_norm": 0.0033011747338628212, "kl": 0.279296875, "learning_rate": 1.8867790706877256e-07, "loss": 0.0002794383908621967, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4810, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/mean_length": 88.48958587646484, "completions/min_length": 43.25, "epoch": 7.169769173492182, "grad_norm": 0.003273576877133742, "kl": 0.29736328125, "learning_rate": 1.8849288376639016e-07, "loss": 0.0002972599468193948, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4811, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.25, "completions/mean_length": 101.39583587646484, "completions/min_length": 38.75, "epoch": 7.171258376768429, "grad_norm": 0.004517483541381147, "kl": 0.26611328125, "learning_rate": 1.8830793015392794e-07, "loss": 0.00026630854699760675, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4812, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.5, "completions/mean_length": 112.93750190734863, "completions/min_length": 45.75, "epoch": 7.1727475800446765, "grad_norm": 0.003224062486432395, "kl": 0.257080078125, "learning_rate": 1.8812304627276349e-07, "loss": 0.0002568440977483988, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4813, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 91.16666984558105, "completions/min_length": 39.75, "epoch": 7.174236783320923, "grad_norm": 0.0051190838417525105, "kl": 0.28564453125, "learning_rate": 1.879382321642587e-07, "loss": 0.00028565985849127173, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4814, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 106.47916984558105, "completions/min_length": 32.0, "epoch": 7.17572598659717, "grad_norm": 0.004285630765995876, "kl": 0.2392578125, "learning_rate": 1.8775348786975976e-07, "loss": 0.0002389173023402691, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4815, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 97.97916793823242, "completions/min_length": 32.25, "epoch": 7.177215189873418, "grad_norm": 2.214872439009231, "kl": 0.2734375, "learning_rate": 1.8756881343059776e-07, "loss": -0.010644545778632164, "memory(GiB)": 112.53, "reward": 1.9791666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4816, "train_speed(iter/s)": 0.027183 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.0, "completions/mean_length": 116.26041793823242, "completions/min_length": 38.75, "epoch": 7.178704393149665, "grad_norm": 0.02245730984509158, "kl": 0.26611328125, "learning_rate": 1.8738420888808765e-07, "loss": 0.00026646326296031475, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4817, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.25, "completions/mean_length": 96.52083396911621, "completions/min_length": 32.75, "epoch": 7.180193596425912, "grad_norm": 0.014761988359021155, "kl": 0.2978515625, "learning_rate": 1.8719967428352884e-07, "loss": 0.00029831688152626157, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4818, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 106.48958778381348, "completions/min_length": 37.25, "epoch": 7.181682799702159, "grad_norm": 0.0032026034615892825, "kl": 0.26611328125, "learning_rate": 1.8701520965820522e-07, "loss": 0.00026647254708223045, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4819, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.0, "completions/mean_length": 114.59375381469727, "completions/min_length": 40.0, "epoch": 7.183172002978407, "grad_norm": 0.002966908889801159, "kl": 0.252685546875, "learning_rate": 1.8683081505338465e-07, "loss": 0.0002530482306610793, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4820, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.0, "completions/mean_length": 89.60417175292969, "completions/min_length": 29.5, "epoch": 7.1846612062546535, "grad_norm": 0.003523571727930552, "kl": 0.29638671875, "learning_rate": 1.8664649051032006e-07, "loss": 0.0002962320577353239, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4821, "train_speed(iter/s)": 0.027183 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.5, "completions/mean_length": 115.38541984558105, "completions/min_length": 35.0, "epoch": 7.186150409530901, "grad_norm": 1.794557587809116, "kl": 0.271484375, "learning_rate": 1.8646223607024807e-07, "loss": 0.010738067328929901, "memory(GiB)": 112.53, "reward": 1.7291666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7291666716337204, "rewards/CineAccuracyORM/std": 0.3284776881337166, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4822, "train_speed(iter/s)": 0.027181 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.25, "completions/mean_length": 102.52083778381348, "completions/min_length": 36.0, "epoch": 7.187639612807148, "grad_norm": 0.0029624828838352257, "kl": 0.2744140625, "learning_rate": 1.862780517743898e-07, "loss": 0.00027440598933026195, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4823, "train_speed(iter/s)": 0.027178 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.0, "completions/mean_length": 84.68750190734863, "completions/min_length": 29.75, "epoch": 7.189128816083396, "grad_norm": 0.004935611738781468, "kl": 0.32080078125, "learning_rate": 1.860939376639508e-07, "loss": 0.00032094816560857, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4824, "train_speed(iter/s)": 0.02718 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 95.77083587646484, "completions/min_length": 44.0, "epoch": 7.1906180193596425, "grad_norm": 0.0035507150167240964, "kl": 0.2763671875, "learning_rate": 1.8590989378012066e-07, "loss": 0.00027695007156580687, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4825, "train_speed(iter/s)": 0.027179 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.25, "completions/mean_length": 112.03125190734863, "completions/min_length": 39.5, "epoch": 7.19210722263589, "grad_norm": 0.004165439455395962, "kl": 0.2646484375, "learning_rate": 1.8572592016407336e-07, "loss": 0.00026462296955287457, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4826, "train_speed(iter/s)": 0.027176 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 106.71875190734863, "completions/min_length": 49.25, "epoch": 7.193596425912137, "grad_norm": 0.0034587296210140707, "kl": 0.2763671875, "learning_rate": 1.855420168569674e-07, "loss": 0.0002761118521448225, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4827, "train_speed(iter/s)": 0.027177 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.0, "completions/mean_length": 100.76041984558105, "completions/min_length": 36.75, "epoch": 7.195085629188384, "grad_norm": 0.003248024088296532, "kl": 0.29931640625, "learning_rate": 1.853581838999454e-07, "loss": 0.000298977829515934, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4828, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 308.5, "completions/mean_length": 106.32291984558105, "completions/min_length": 41.5, "epoch": 7.1965748324646315, "grad_norm": 0.0029758362557811666, "kl": 0.26513671875, "learning_rate": 1.8517442133413402e-07, "loss": 0.00026525859721004963, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4829, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 99.70833587646484, "completions/min_length": 33.0, "epoch": 7.198064035740878, "grad_norm": 0.9632267195271961, "kl": 0.35302734375, "learning_rate": 1.8499072920064446e-07, "loss": -0.006613486912101507, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4830, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 95.53125381469727, "completions/min_length": 40.5, "epoch": 7.199553239017126, "grad_norm": 0.003587146305900866, "kl": 0.27880859375, "learning_rate": 1.8480710754057183e-07, "loss": 0.0002787231933325529, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4831, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 103.02083587646484, "completions/min_length": 39.25, "epoch": 7.201042442293373, "grad_norm": 0.003236511420424616, "kl": 0.2685546875, "learning_rate": 1.846235563949961e-07, "loss": 0.00026849517598748207, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4832, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.75, "completions/mean_length": 100.35416984558105, "completions/min_length": 38.25, "epoch": 7.2025316455696204, "grad_norm": 0.0032115339231197143, "kl": 0.29150390625, "learning_rate": 1.8444007580498084e-07, "loss": 0.00029190522036515176, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4833, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.75, "completions/mean_length": 103.78125190734863, "completions/min_length": 34.25, "epoch": 7.204020848845867, "grad_norm": 0.3728222022568288, "kl": 0.5009765625, "learning_rate": 1.8425666581157407e-07, "loss": 0.0004994927439838648, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4834, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 104.34375190734863, "completions/min_length": 40.25, "epoch": 7.205510052122115, "grad_norm": 2.263834122234755, "kl": 0.280517578125, "learning_rate": 1.8407332645580804e-07, "loss": 0.02820717543363571, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.1964849978685379, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4835, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.0, "completions/mean_length": 106.04166793823242, "completions/min_length": 36.75, "epoch": 7.206999255398362, "grad_norm": 0.014447483055324105, "kl": 0.2578125, "learning_rate": 1.8389005777869897e-07, "loss": 0.0002576212282292545, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4836, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 87.17708587646484, "completions/min_length": 39.25, "epoch": 7.208488458674609, "grad_norm": 1.1659968253313642, "kl": 0.274169921875, "learning_rate": 1.8370685982124795e-07, "loss": 0.010877705179154873, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4837, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 104.03125190734863, "completions/min_length": 36.75, "epoch": 7.209977661950856, "grad_norm": 0.00322375102683896, "kl": 0.28125, "learning_rate": 1.8352373262443915e-07, "loss": 0.00028136002947576344, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4838, "train_speed(iter/s)": 0.027173 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.75, "completions/mean_length": 112.25000190734863, "completions/min_length": 34.25, "epoch": 7.211466865227104, "grad_norm": 0.002905468300949698, "kl": 0.253173828125, "learning_rate": 1.83340676229242e-07, "loss": 0.0002534435479901731, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4839, "train_speed(iter/s)": 0.027174 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 106.67708587646484, "completions/min_length": 30.5, "epoch": 7.212956068503351, "grad_norm": 0.003186930832621817, "kl": 0.267822265625, "learning_rate": 1.831576906766094e-07, "loss": 0.0002673269482329488, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4840, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 110.57292175292969, "completions/min_length": 37.75, "epoch": 7.2144452717795975, "grad_norm": 0.0030449483983477493, "kl": 0.264404296875, "learning_rate": 1.8297477600747852e-07, "loss": 0.00026422514929436147, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4841, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.75, "completions/mean_length": 105.87500190734863, "completions/min_length": 39.5, "epoch": 7.215934475055845, "grad_norm": 0.0035913171464545582, "kl": 0.274169921875, "learning_rate": 1.8279193226277117e-07, "loss": 0.00027361366664990783, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4842, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 93.10416793823242, "completions/min_length": 31.25, "epoch": 7.217423678332092, "grad_norm": 0.0035519622063499485, "kl": 0.3173828125, "learning_rate": 1.826091594833923e-07, "loss": 0.0003175918245688081, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4843, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 108.02083396911621, "completions/min_length": 42.0, "epoch": 7.21891288160834, "grad_norm": 0.0029481597851675674, "kl": 0.25, "learning_rate": 1.8242645771023203e-07, "loss": 0.0002495810331311077, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4844, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.25, "completions/mean_length": 105.05208778381348, "completions/min_length": 42.75, "epoch": 7.2204020848845865, "grad_norm": 0.0033185794617602016, "kl": 0.255615234375, "learning_rate": 1.8224382698416396e-07, "loss": 0.00025597430067136884, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4845, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.5, "completions/mean_length": 98.11458587646484, "completions/min_length": 36.0, "epoch": 7.221891288160834, "grad_norm": 0.003005161703799739, "kl": 0.29833984375, "learning_rate": 1.8206126734604588e-07, "loss": 0.0002982160949613899, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4846, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.5, "completions/mean_length": 103.76042175292969, "completions/min_length": 32.75, "epoch": 7.223380491437081, "grad_norm": 0.003179299668018132, "kl": 0.282470703125, "learning_rate": 1.8187877883672021e-07, "loss": 0.0002822859096340835, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4847, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 96.83333778381348, "completions/min_length": 34.25, "epoch": 7.224869694713329, "grad_norm": 1.001194779301726, "kl": 0.3173828125, "learning_rate": 1.8169636149701228e-07, "loss": 0.013856202363967896, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4848, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 95.42708587646484, "completions/min_length": 36.75, "epoch": 7.226358897989575, "grad_norm": 0.003422385991575476, "kl": 0.255126953125, "learning_rate": 1.8151401536773308e-07, "loss": 0.0002548044722061604, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4849, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 95.30208587646484, "completions/min_length": 30.25, "epoch": 7.227848101265823, "grad_norm": 0.0037538540800886905, "kl": 0.3046875, "learning_rate": 1.8133174048967598e-07, "loss": 0.00030458930996246636, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4850, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 101.89583587646484, "completions/min_length": 46.0, "epoch": 7.22933730454207, "grad_norm": 0.06375118649552423, "kl": 0.3291015625, "learning_rate": 1.8114953690361985e-07, "loss": 0.000329213886288926, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4851, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 96.81250381469727, "completions/min_length": 27.25, "epoch": 7.230826507818318, "grad_norm": 0.004321358203409437, "kl": 0.28857421875, "learning_rate": 1.809674046503268e-07, "loss": 0.0002884386631194502, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4852, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.25, "completions/mean_length": 107.66666793823242, "completions/min_length": 42.75, "epoch": 7.232315711094564, "grad_norm": 0.4643407459866798, "kl": 0.3876953125, "learning_rate": 1.80785343770543e-07, "loss": 0.00038799489266239107, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4853, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 91.31250190734863, "completions/min_length": 35.75, "epoch": 7.233804914370811, "grad_norm": 0.0038231925634975, "kl": 0.29296875, "learning_rate": 1.806033543049994e-07, "loss": 0.000292481214273721, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4854, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.0, "completions/mean_length": 102.14583587646484, "completions/min_length": 37.0, "epoch": 7.235294117647059, "grad_norm": 2.5022590600879573, "kl": 0.296875, "learning_rate": 1.8042143629440974e-07, "loss": 0.00545761501416564, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.43498801440000534, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4855, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 95.01041984558105, "completions/min_length": 31.5, "epoch": 7.236783320923306, "grad_norm": 0.002863506318832873, "kl": 0.3115234375, "learning_rate": 1.80239589779473e-07, "loss": 0.0003115962026640773, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4856, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.25, "completions/mean_length": 92.54166793823242, "completions/min_length": 30.75, "epoch": 7.238272524199553, "grad_norm": 0.0034041852020662744, "kl": 0.28173828125, "learning_rate": 1.800578148008714e-07, "loss": 0.0002819189103320241, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4857, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.75, "completions/mean_length": 94.35416984558105, "completions/min_length": 35.25, "epoch": 7.2397617274758, "grad_norm": 0.00352107218334986, "kl": 0.27978515625, "learning_rate": 1.7987611139927116e-07, "loss": 0.00027980381855741143, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4858, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 98.57291793823242, "completions/min_length": 31.75, "epoch": 7.241250930752048, "grad_norm": 0.789951237535456, "kl": 0.29052734375, "learning_rate": 1.796944796153233e-07, "loss": -0.0037606246769428253, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4859, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 92.90625381469727, "completions/min_length": 34.75, "epoch": 7.242740134028295, "grad_norm": 0.0028777869164143027, "kl": 0.29248046875, "learning_rate": 1.7951291948966146e-07, "loss": 0.00029250181978568435, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4860, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.0, "completions/mean_length": 101.27083587646484, "completions/min_length": 34.25, "epoch": 7.244229337304542, "grad_norm": 0.003112889602798093, "kl": 0.28271484375, "learning_rate": 1.793314310629046e-07, "loss": 0.00028290037880651653, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4861, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 99.53125190734863, "completions/min_length": 39.75, "epoch": 7.245718540580789, "grad_norm": 0.0032541718158794627, "kl": 0.26953125, "learning_rate": 1.791500143756548e-07, "loss": 0.00026969180908054113, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4862, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 94.19791984558105, "completions/min_length": 40.5, "epoch": 7.247207743857037, "grad_norm": 0.003016850009828843, "kl": 0.2880859375, "learning_rate": 1.7896866946849838e-07, "loss": 0.0002877888036891818, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4863, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 112.37500190734863, "completions/min_length": 32.0, "epoch": 7.248696947133284, "grad_norm": 3.2675016711997014, "kl": 0.25634765625, "learning_rate": 1.787873963820054e-07, "loss": -0.00974223017692566, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.23421530425548553, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4864, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 94.01041984558105, "completions/min_length": 35.25, "epoch": 7.250186150409531, "grad_norm": 1.1150732496950815, "kl": 0.35595703125, "learning_rate": 1.7860619515673032e-07, "loss": -0.010678949765861034, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4865, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 99.79166793823242, "completions/min_length": 26.0, "epoch": 7.251675353685778, "grad_norm": 0.003057416513521111, "kl": 0.28515625, "learning_rate": 1.7842506583321105e-07, "loss": 0.0002850885794032365, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4866, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 101.48958587646484, "completions/min_length": 41.0, "epoch": 7.253164556962025, "grad_norm": 0.002982594041146472, "kl": 0.254150390625, "learning_rate": 1.7824400845196963e-07, "loss": 0.0002545596507843584, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4867, "train_speed(iter/s)": 0.027172 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 93.52083396911621, "completions/min_length": 37.5, "epoch": 7.254653760238273, "grad_norm": 0.004288594870270584, "kl": 0.31640625, "learning_rate": 1.780630230535119e-07, "loss": 0.00031700971885584295, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4868, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 78.66666984558105, "completions/min_length": 27.5, "epoch": 7.256142963514519, "grad_norm": 0.003184909984341616, "kl": 0.341796875, "learning_rate": 1.7788210967832744e-07, "loss": 0.00034146299003623426, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4869, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.5, "completions/mean_length": 97.27083587646484, "completions/min_length": 29.25, "epoch": 7.257632166790767, "grad_norm": 0.0031251974488659966, "kl": 0.28662109375, "learning_rate": 1.7770126836689037e-07, "loss": 0.0002868969168048352, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4870, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 98.75000190734863, "completions/min_length": 44.0, "epoch": 7.259121370067014, "grad_norm": 0.004502216686915849, "kl": 0.29150390625, "learning_rate": 1.7752049915965806e-07, "loss": 0.0002914807992056012, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4871, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 95.28125190734863, "completions/min_length": 36.0, "epoch": 7.2606105733432615, "grad_norm": 0.003018566656731067, "kl": 0.272216796875, "learning_rate": 1.7733980209707182e-07, "loss": 0.00027216062881052494, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4872, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 98.81250190734863, "completions/min_length": 40.5, "epoch": 7.262099776619508, "grad_norm": 0.0033479239137014583, "kl": 0.277587890625, "learning_rate": 1.771591772195571e-07, "loss": 0.00027775426860898733, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4873, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 83.51041793823242, "completions/min_length": 34.75, "epoch": 7.263588979895756, "grad_norm": 0.00399452900348508, "kl": 0.3203125, "learning_rate": 1.7697862456752271e-07, "loss": 0.00032019891659729183, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4874, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.0, "completions/mean_length": 101.76041984558105, "completions/min_length": 36.25, "epoch": 7.265078183172003, "grad_norm": 1.201727872870292, "kl": 0.265625, "learning_rate": 1.7679814418136223e-07, "loss": -0.031637925654649734, "memory(GiB)": 112.53, "reward": 1.5937500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500102445483, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4875, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.25, "completions/mean_length": 87.96875190734863, "completions/min_length": 32.0, "epoch": 7.2665673864482505, "grad_norm": 0.0032005169781569366, "kl": 0.298828125, "learning_rate": 1.766177361014518e-07, "loss": 0.00029923717374913394, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4876, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 94.01042175292969, "completions/min_length": 39.0, "epoch": 7.268056589724497, "grad_norm": 0.0029956486650360784, "kl": 0.27197265625, "learning_rate": 1.7643740036815258e-07, "loss": 0.00027185381622985005, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4877, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 81.35416984558105, "completions/min_length": 26.25, "epoch": 7.269545793000745, "grad_norm": 0.0036213644579861566, "kl": 0.322265625, "learning_rate": 1.7625713702180872e-07, "loss": 0.0003230611328035593, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4878, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 107.07291984558105, "completions/min_length": 45.5, "epoch": 7.271034996276992, "grad_norm": 0.002660074821189485, "kl": 0.25927734375, "learning_rate": 1.7607694610274842e-07, "loss": 0.0002592888777144253, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4879, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 95.21875190734863, "completions/min_length": 34.0, "epoch": 7.272524199553239, "grad_norm": 0.004040154052411783, "kl": 0.27197265625, "learning_rate": 1.7589682765128423e-07, "loss": 0.0002721509663388133, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4880, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 98.51041984558105, "completions/min_length": 32.0, "epoch": 7.274013402829486, "grad_norm": 0.003145944738237152, "kl": 0.283203125, "learning_rate": 1.7571678170771126e-07, "loss": 0.00028361365548335016, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4881, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.75, "completions/mean_length": 86.12500190734863, "completions/min_length": 30.75, "epoch": 7.275502606105733, "grad_norm": 0.003140829748408601, "kl": 0.3154296875, "learning_rate": 1.7553680831230966e-07, "loss": 0.0003157245519105345, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4882, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 97.15625, "completions/min_length": 21.5, "epoch": 7.276991809381981, "grad_norm": 0.8043212454852535, "kl": 0.27392578125, "learning_rate": 1.7535690750534264e-07, "loss": -0.0023689675144851208, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4883, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 97.18750381469727, "completions/min_length": 37.75, "epoch": 7.2784810126582276, "grad_norm": 0.934198338889482, "kl": 0.27099609375, "learning_rate": 1.751770793270572e-07, "loss": 0.028052031993865967, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4884, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.25, "completions/mean_length": 101.67708587646484, "completions/min_length": 37.75, "epoch": 7.279970215934475, "grad_norm": 2.4386378630652272, "kl": 0.27685546875, "learning_rate": 1.7499732381768479e-07, "loss": 0.0021550317760556936, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4885, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.5, "completions/mean_length": 80.15625190734863, "completions/min_length": 41.0, "epoch": 7.281459419210722, "grad_norm": 1.5139779020836104, "kl": 0.306640625, "learning_rate": 1.7481764101743924e-07, "loss": 0.0009494466939941049, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4886, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.25, "completions/mean_length": 87.33333778381348, "completions/min_length": 32.25, "epoch": 7.28294862248697, "grad_norm": 0.003342817788520657, "kl": 0.28857421875, "learning_rate": 1.7463803096651974e-07, "loss": 0.00028865932836197317, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4887, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 96.84375190734863, "completions/min_length": 32.0, "epoch": 7.2844378257632165, "grad_norm": 0.002848184382768283, "kl": 0.2705078125, "learning_rate": 1.7445849370510763e-07, "loss": 0.00027056338149122894, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4888, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 93.53125190734863, "completions/min_length": 34.0, "epoch": 7.285927029039464, "grad_norm": 0.0030235505752413457, "kl": 0.28369140625, "learning_rate": 1.742790292733693e-07, "loss": 0.000283700879663229, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4889, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.0, "completions/mean_length": 92.95833778381348, "completions/min_length": 37.25, "epoch": 7.287416232315711, "grad_norm": 0.0031536705670569833, "kl": 0.27734375, "learning_rate": 1.7409963771145403e-07, "loss": 0.0002773294982034713, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4890, "train_speed(iter/s)": 0.027163 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.5, "completions/mean_length": 109.73958778381348, "completions/min_length": 34.75, "epoch": 7.288905435591959, "grad_norm": 0.5162273402994509, "kl": 0.266357421875, "learning_rate": 1.7392031905949488e-07, "loss": -0.02602533809840679, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4891, "train_speed(iter/s)": 0.027162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 101.47916984558105, "completions/min_length": 39.75, "epoch": 7.2903946388682055, "grad_norm": 0.0033600842261265252, "kl": 0.26904296875, "learning_rate": 1.7374107335760934e-07, "loss": 0.0002689888933673501, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4892, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 89.31250381469727, "completions/min_length": 30.25, "epoch": 7.291883842144452, "grad_norm": 0.5743812150626344, "kl": 0.3154296875, "learning_rate": 1.7356190064589726e-07, "loss": 0.017923440784215927, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4893, "train_speed(iter/s)": 0.027158 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.0, "completions/mean_length": 81.96875190734863, "completions/min_length": 38.0, "epoch": 7.2933730454207, "grad_norm": 0.0032149131142606615, "kl": 0.3037109375, "learning_rate": 1.7338280096444341e-07, "loss": 0.00030319724464789033, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4894, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 89.42708778381348, "completions/min_length": 35.75, "epoch": 7.294862248696947, "grad_norm": 0.0032516991153350005, "kl": 0.306640625, "learning_rate": 1.7320377435331557e-07, "loss": 0.0003059451119042933, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4895, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 81.62500190734863, "completions/min_length": 35.25, "epoch": 7.2963514519731945, "grad_norm": 0.7484718851330412, "kl": 0.33056640625, "learning_rate": 1.7302482085256514e-07, "loss": -0.0043202610686421394, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4896, "train_speed(iter/s)": 0.027158 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 85.26041984558105, "completions/min_length": 34.0, "epoch": 7.297840655249441, "grad_norm": 1.0050973062420139, "kl": 0.322265625, "learning_rate": 1.7284594050222778e-07, "loss": -0.013189110904932022, "memory(GiB)": 112.53, "reward": 1.90625, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4897, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 94.40625381469727, "completions/min_length": 33.5, "epoch": 7.299329858525689, "grad_norm": 0.0031824666557817866, "kl": 0.26904296875, "learning_rate": 1.7266713334232175e-07, "loss": 0.00026913589681498706, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4898, "train_speed(iter/s)": 0.027158 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 91.27083587646484, "completions/min_length": 29.5, "epoch": 7.300819061801936, "grad_norm": 0.003527484877143702, "kl": 0.30859375, "learning_rate": 1.7248839941285e-07, "loss": 0.0003084995551034808, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4899, "train_speed(iter/s)": 0.027158 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 90.61458396911621, "completions/min_length": 37.5, "epoch": 7.302308265078183, "grad_norm": 0.6411655276756151, "kl": 0.29052734375, "learning_rate": 1.7230973875379846e-07, "loss": -0.010933998972177505, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4900, "train_speed(iter/s)": 0.02716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 97.27083396911621, "completions/min_length": 38.25, "epoch": 7.30379746835443, "grad_norm": 1.1650906604025308, "kl": 0.268798828125, "learning_rate": 1.7213115140513685e-07, "loss": -0.007997487671673298, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4901, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.5, "completions/mean_length": 84.42708396911621, "completions/min_length": 33.75, "epoch": 7.305286671630678, "grad_norm": 0.9044985767351073, "kl": 0.3056640625, "learning_rate": 1.7195263740681842e-07, "loss": 0.0049565318040549755, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 4902, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 87.21875190734863, "completions/min_length": 37.25, "epoch": 7.306775874906925, "grad_norm": 0.1294724819065716, "kl": 0.380859375, "learning_rate": 1.7177419679877993e-07, "loss": 0.000381260528229177, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4903, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 89.79166984558105, "completions/min_length": 36.75, "epoch": 7.308265078183172, "grad_norm": 1.5159181797848238, "kl": 0.2890625, "learning_rate": 1.715958296209422e-07, "loss": 0.03090563230216503, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.08625819534063339, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.35134297609329224, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4904, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 91.47916793823242, "completions/min_length": 36.25, "epoch": 7.309754281459419, "grad_norm": 1.8432334125752072, "kl": 0.350830078125, "learning_rate": 1.7141753591320913e-07, "loss": -0.016855809837579727, "memory(GiB)": 112.53, "reward": 1.4791666865348816, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.4791666716337204, "rewards/CineAccuracyORM/std": 0.4790416583418846, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4905, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.0, "completions/mean_length": 97.02083587646484, "completions/min_length": 37.5, "epoch": 7.311243484735666, "grad_norm": 0.0031829864427797014, "kl": 0.283203125, "learning_rate": 1.7123931571546824e-07, "loss": 0.00028339895652607083, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4906, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 91.58333587646484, "completions/min_length": 37.25, "epoch": 7.312732688011914, "grad_norm": 0.0034903582805942894, "kl": 0.302978515625, "learning_rate": 1.7106116906759077e-07, "loss": 0.0003030907246284187, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4907, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 91.84375381469727, "completions/min_length": 35.75, "epoch": 7.3142218912881605, "grad_norm": 0.0029419587761699375, "kl": 0.28564453125, "learning_rate": 1.708830960094313e-07, "loss": 0.0002852053730748594, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4908, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 90.71875381469727, "completions/min_length": 34.5, "epoch": 7.315711094564408, "grad_norm": 1.4461309177198314, "kl": 0.31689453125, "learning_rate": 1.7070509658082828e-07, "loss": 0.0012584147043526173, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4909, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.75, "completions/mean_length": 92.60416793823242, "completions/min_length": 36.75, "epoch": 7.317200297840655, "grad_norm": 0.0038873641436329076, "kl": 0.3193359375, "learning_rate": 1.7052717082160344e-07, "loss": 0.00031901017064228654, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4910, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 86.08333587646484, "completions/min_length": 33.5, "epoch": 7.318689501116903, "grad_norm": 1.1054258799193322, "kl": 0.30615234375, "learning_rate": 1.70349318771562e-07, "loss": -0.0044771237298846245, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4911, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 99.14583587646484, "completions/min_length": 37.25, "epoch": 7.320178704393149, "grad_norm": 0.4332658288199274, "kl": 0.2861328125, "learning_rate": 1.701715404704928e-07, "loss": 0.0163901224732399, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4912, "train_speed(iter/s)": 0.027158 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/mean_length": 91.15625190734863, "completions/min_length": 30.75, "epoch": 7.321667907669397, "grad_norm": 2.3819368551121065, "kl": 0.28857421875, "learning_rate": 1.6999383595816813e-07, "loss": 0.0009029433131217957, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4913, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 89.26041793823242, "completions/min_length": 37.75, "epoch": 7.323157110945644, "grad_norm": 1.3975983226819424, "kl": 0.27099609375, "learning_rate": 1.698162052743436e-07, "loss": -0.011877465061843395, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4914, "train_speed(iter/s)": 0.027161 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 89.55208587646484, "completions/min_length": 40.25, "epoch": 7.324646314221892, "grad_norm": 0.003291082983763111, "kl": 0.30419921875, "learning_rate": 1.6963864845875886e-07, "loss": 0.0003035162517335266, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4915, "train_speed(iter/s)": 0.027161 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.5, "completions/mean_length": 91.95833587646484, "completions/min_length": 32.5, "epoch": 7.326135517498138, "grad_norm": 0.003113305968756877, "kl": 0.287109375, "learning_rate": 1.694611655511365e-07, "loss": 0.00028730809572152793, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4916, "train_speed(iter/s)": 0.02716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 93.25000381469727, "completions/min_length": 42.0, "epoch": 7.327624720774386, "grad_norm": 0.0033426464099727544, "kl": 0.29345703125, "learning_rate": 1.692837565911825e-07, "loss": 0.0002928461180999875, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4917, "train_speed(iter/s)": 0.027161 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.75, "completions/mean_length": 85.0625, "completions/min_length": 30.0, "epoch": 7.329113924050633, "grad_norm": 1.3390710174810156, "kl": 0.31005859375, "learning_rate": 1.6910642161858713e-07, "loss": -0.009100595489144325, "memory(GiB)": 112.53, "reward": 1.572916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5729166865348816, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4918, "train_speed(iter/s)": 0.027162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 88.76041984558105, "completions/min_length": 32.75, "epoch": 7.33060312732688, "grad_norm": 0.0035216666690798585, "kl": 0.2822265625, "learning_rate": 1.6892916067302277e-07, "loss": 0.0002824482799042016, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4919, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 106.52083587646484, "completions/min_length": 37.0, "epoch": 7.332092330603127, "grad_norm": 0.04558966389337999, "kl": 0.267578125, "learning_rate": 1.6875197379414658e-07, "loss": 0.00026785029331222177, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4920, "train_speed(iter/s)": 0.027162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 90.77083587646484, "completions/min_length": 28.25, "epoch": 7.333581533879374, "grad_norm": 0.8300978464164485, "kl": 0.302734375, "learning_rate": 1.6857486102159824e-07, "loss": 0.022783558815717697, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666679084301, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4921, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 80.53125381469727, "completions/min_length": 33.25, "epoch": 7.335070737155622, "grad_norm": 0.003581286621915504, "kl": 0.314453125, "learning_rate": 1.6839782239500112e-07, "loss": 0.0003145358641631901, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4922, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 94.28125381469727, "completions/min_length": 31.5, "epoch": 7.336559940431869, "grad_norm": 0.043069577911659516, "kl": 0.30859375, "learning_rate": 1.6822085795396246e-07, "loss": 0.00030822769622318447, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4923, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 93.36458587646484, "completions/min_length": 40.5, "epoch": 7.338049143708116, "grad_norm": 0.003492623928204995, "kl": 0.30322265625, "learning_rate": 1.6804396773807188e-07, "loss": 0.0003026703489013016, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4924, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.75, "completions/mean_length": 84.33333587646484, "completions/min_length": 33.0, "epoch": 7.339538346984363, "grad_norm": 0.0032510195959352087, "kl": 0.3037109375, "learning_rate": 1.678671517869037e-07, "loss": 0.0003032023087143898, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4925, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 100.71875381469727, "completions/min_length": 38.5, "epoch": 7.341027550260611, "grad_norm": 0.003644231230487573, "kl": 0.291015625, "learning_rate": 1.6769041014001417e-07, "loss": 0.0002910356561187655, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4926, "train_speed(iter/s)": 0.027162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 81.20833778381348, "completions/min_length": 32.25, "epoch": 7.342516753536858, "grad_norm": 0.0034516718930877347, "kl": 0.3134765625, "learning_rate": 1.675137428369442e-07, "loss": 0.0003130544500891119, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4927, "train_speed(iter/s)": 0.027161 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.0, "completions/mean_length": 103.57291984558105, "completions/min_length": 36.5, "epoch": 7.344005956813105, "grad_norm": 0.0031316683397381306, "kl": 0.26611328125, "learning_rate": 1.6733714991721738e-07, "loss": 0.000266232353169471, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4928, "train_speed(iter/s)": 0.02716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 91.05208587646484, "completions/min_length": 41.0, "epoch": 7.345495160089352, "grad_norm": 0.003773613596328034, "kl": 0.287109375, "learning_rate": 1.671606314203407e-07, "loss": 0.0002871777687687427, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4929, "train_speed(iter/s)": 0.027162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 83.53125190734863, "completions/min_length": 40.5, "epoch": 7.3469843633656, "grad_norm": 0.0036782500727534097, "kl": 0.302734375, "learning_rate": 1.669841873858051e-07, "loss": 0.0003029026265721768, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4930, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.5, "completions/mean_length": 92.47916793823242, "completions/min_length": 35.25, "epoch": 7.348473566641847, "grad_norm": 0.0033740950357110835, "kl": 0.28173828125, "learning_rate": 1.6680781785308367e-07, "loss": 0.0002818610519170761, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4931, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 91.47916793823242, "completions/min_length": 37.5, "epoch": 7.349962769918093, "grad_norm": 1.7012999444272945, "kl": 0.287109375, "learning_rate": 1.6663152286163412e-07, "loss": 0.005641379859298468, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4932, "train_speed(iter/s)": 0.027161 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 90.70833587646484, "completions/min_length": 34.25, "epoch": 7.351451973194341, "grad_norm": 0.0034079404535316067, "kl": 0.307373046875, "learning_rate": 1.6645530245089668e-07, "loss": 0.00030711994622834027, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4933, "train_speed(iter/s)": 0.02716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 97.52083396911621, "completions/min_length": 30.5, "epoch": 7.352941176470588, "grad_norm": 1.102684532228729, "kl": 0.30322265625, "learning_rate": 1.6627915666029502e-07, "loss": -0.04120030999183655, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4934, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 88.13541889190674, "completions/min_length": 31.0, "epoch": 7.3544303797468356, "grad_norm": 0.0031326061310332247, "kl": 0.2880859375, "learning_rate": 1.6610308552923662e-07, "loss": 0.00028787669725716114, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4935, "train_speed(iter/s)": 0.027161 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 87.57291984558105, "completions/min_length": 44.0, "epoch": 7.355919583023082, "grad_norm": 0.0035075147972704495, "kl": 0.287109375, "learning_rate": 1.6592708909711127e-07, "loss": 0.0002874779747799039, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4936, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 86.97916984558105, "completions/min_length": 34.0, "epoch": 7.35740878629933, "grad_norm": 0.003783078662913332, "kl": 0.30859375, "learning_rate": 1.6575116740329315e-07, "loss": 0.00030834239441901445, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4937, "train_speed(iter/s)": 0.027159 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/mean_length": 83.85416793823242, "completions/min_length": 37.0, "epoch": 7.358897989575577, "grad_norm": 1.9139178822165135, "kl": 0.3173828125, "learning_rate": 1.6557532048713902e-07, "loss": -0.009351348504424095, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4938, "train_speed(iter/s)": 0.02716 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 88.82291984558105, "completions/min_length": 36.5, "epoch": 7.3603871928518245, "grad_norm": 1.4012688781025056, "kl": 0.2724609375, "learning_rate": 1.653995483879891e-07, "loss": -0.007983390241861343, "memory(GiB)": 112.53, "reward": 1.7083333730697632, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7083333432674408, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4939, "train_speed(iter/s)": 0.027161 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.25, "completions/mean_length": 82.89583587646484, "completions/min_length": 30.5, "epoch": 7.361876396128071, "grad_norm": 1.1918562548911973, "kl": 0.3134765625, "learning_rate": 1.652238511451668e-07, "loss": -0.011686990037560463, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4940, "train_speed(iter/s)": 0.027163 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 85.79166984558105, "completions/min_length": 38.75, "epoch": 7.363365599404319, "grad_norm": 0.0030927542001417006, "kl": 0.26904296875, "learning_rate": 1.650482287979788e-07, "loss": 0.000269686192041263, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4941, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 79.70833587646484, "completions/min_length": 43.25, "epoch": 7.364854802680566, "grad_norm": 0.0033952235709266273, "kl": 0.31298828125, "learning_rate": 1.648726813857153e-07, "loss": 0.0003123542701359838, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4942, "train_speed(iter/s)": 0.027162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 92.97916984558105, "completions/min_length": 35.0, "epoch": 7.3663440059568135, "grad_norm": 0.003305739079905286, "kl": 0.267578125, "learning_rate": 1.6469720894764945e-07, "loss": 0.00026757351588457823, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4943, "train_speed(iter/s)": 0.027162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.75, "completions/mean_length": 86.18750381469727, "completions/min_length": 40.0, "epoch": 7.36783320923306, "grad_norm": 0.0032760497157591577, "kl": 0.288818359375, "learning_rate": 1.6452181152303763e-07, "loss": 0.00028905493672937155, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4944, "train_speed(iter/s)": 0.027162 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 86.81250381469727, "completions/min_length": 42.0, "epoch": 7.369322412509307, "grad_norm": 0.0035034734291862412, "kl": 0.302734375, "learning_rate": 1.6434648915111947e-07, "loss": 0.0003030065563507378, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4945, "train_speed(iter/s)": 0.027163 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 96.44791984558105, "completions/min_length": 40.75, "epoch": 7.370811615785555, "grad_norm": 0.003251730737830455, "kl": 0.29052734375, "learning_rate": 1.6417124187111774e-07, "loss": 0.0002898508682847023, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4946, "train_speed(iter/s)": 0.027163 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 84.83333587646484, "completions/min_length": 37.0, "epoch": 7.372300819061802, "grad_norm": 1.0454038165137371, "kl": 0.3193359375, "learning_rate": 1.6399606972223878e-07, "loss": -0.004443354904651642, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4947, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 160.5, "completions/mean_length": 77.61458396911621, "completions/min_length": 40.0, "epoch": 7.373790022338049, "grad_norm": 0.003808900086284945, "kl": 0.30419921875, "learning_rate": 1.6382097274367174e-07, "loss": 0.0003039971925318241, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4948, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.0, "completions/mean_length": 83.15625190734863, "completions/min_length": 43.75, "epoch": 7.375279225614296, "grad_norm": 0.0038096814389037867, "kl": 0.322265625, "learning_rate": 1.63645950974589e-07, "loss": 0.00032212547375820577, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4949, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 101.39583587646484, "completions/min_length": 41.75, "epoch": 7.376768428890544, "grad_norm": 1.617224870124521, "kl": 0.2685546875, "learning_rate": 1.6347100445414625e-07, "loss": 0.008844468742609024, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4950, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 87.22916793823242, "completions/min_length": 40.5, "epoch": 7.3782576321667905, "grad_norm": 2.5251323895865947, "kl": 0.297119140625, "learning_rate": 1.6329613322148216e-07, "loss": 0.0020135450176894665, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4951, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 91.59375190734863, "completions/min_length": 33.75, "epoch": 7.379746835443038, "grad_norm": 0.003199984316686343, "kl": 0.29345703125, "learning_rate": 1.6312133731571864e-07, "loss": 0.0002927995228674263, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4952, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 87.69791793823242, "completions/min_length": 45.5, "epoch": 7.381236038719285, "grad_norm": 1.2297999312730952, "kl": 0.82470703125, "learning_rate": 1.62946616775961e-07, "loss": -0.004763968288898468, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4953, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.0, "completions/mean_length": 99.10416984558105, "completions/min_length": 40.5, "epoch": 7.382725241995533, "grad_norm": 0.003491285060948859, "kl": 0.30322265625, "learning_rate": 1.6277197164129735e-07, "loss": 0.0003025586192961782, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4954, "train_speed(iter/s)": 0.027163 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 88.96875381469727, "completions/min_length": 43.75, "epoch": 7.3842144452717795, "grad_norm": 0.003352272459559209, "kl": 0.302734375, "learning_rate": 1.6259740195079902e-07, "loss": 0.00030368746956810355, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4955, "train_speed(iter/s)": 0.027163 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 90.55208587646484, "completions/min_length": 37.0, "epoch": 7.385703648548027, "grad_norm": 0.004202737282563723, "kl": 0.29736328125, "learning_rate": 1.6242290774352052e-07, "loss": 0.00029732423718087375, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4956, "train_speed(iter/s)": 0.027163 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 92.77083587646484, "completions/min_length": 44.5, "epoch": 7.387192851824274, "grad_norm": 0.8014668800575158, "kl": 0.28369140625, "learning_rate": 1.622484890584993e-07, "loss": -0.006272091995924711, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4957, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.0, "completions/mean_length": 85.58333396911621, "completions/min_length": 32.25, "epoch": 7.388682055100521, "grad_norm": 1.2810788778142899, "kl": 0.282958984375, "learning_rate": 1.6207414593475633e-07, "loss": 0.007065880578011274, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4958, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.5, "completions/mean_length": 82.16666984558105, "completions/min_length": 38.75, "epoch": 7.3901712583767685, "grad_norm": 3.4361881956269285, "kl": 0.76025390625, "learning_rate": 1.618998784112952e-07, "loss": -0.025333596393465996, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.10346910171210766, "rewards/CineAccuracyORM/mean": 0.854166679084301, "rewards/CineAccuracyORM/std": 0.17827537283301353, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4959, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 85.84375190734863, "completions/min_length": 39.0, "epoch": 7.391660461653015, "grad_norm": 0.004345133219478069, "kl": 0.30517578125, "learning_rate": 1.6172568652710289e-07, "loss": 0.00030517380218952894, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4960, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 84.50000190734863, "completions/min_length": 37.75, "epoch": 7.393149664929263, "grad_norm": 0.8639659566584503, "kl": 0.3134765625, "learning_rate": 1.6155157032114925e-07, "loss": 0.015058069489896297, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4961, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 94.50000381469727, "completions/min_length": 43.0, "epoch": 7.39463886820551, "grad_norm": 2.469718633340167, "kl": 0.293212890625, "learning_rate": 1.6137752983238723e-07, "loss": 0.00011987867765128613, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4962, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 84.02083587646484, "completions/min_length": 31.0, "epoch": 7.396128071481757, "grad_norm": 0.003940126285505065, "kl": 0.29931640625, "learning_rate": 1.6120356509975335e-07, "loss": 0.0002989588538184762, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4963, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 83.97916793823242, "completions/min_length": 39.0, "epoch": 7.397617274758004, "grad_norm": 2.5515738690949497, "kl": 0.322265625, "learning_rate": 1.6102967616216617e-07, "loss": -0.0015346326399594545, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4964, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 92.61458587646484, "completions/min_length": 40.0, "epoch": 7.399106478034252, "grad_norm": 0.003984046990059566, "kl": 0.27783203125, "learning_rate": 1.6085586305852828e-07, "loss": 0.0002772793814074248, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4965, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 144.5, "completions/mean_length": 70.77083396911621, "completions/min_length": 31.75, "epoch": 7.400595681310499, "grad_norm": 0.00460549464720868, "kl": 0.322265625, "learning_rate": 1.606821258277247e-07, "loss": 0.0003227303677704185, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4966, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 84.55208492279053, "completions/min_length": 36.0, "epoch": 7.402084884586746, "grad_norm": 0.0030901153216309294, "kl": 0.3193359375, "learning_rate": 1.6050846450862366e-07, "loss": 0.00031945592490956187, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4967, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 84.85416984558105, "completions/min_length": 40.5, "epoch": 7.403574087862993, "grad_norm": 0.0038556870008025525, "kl": 0.30419921875, "learning_rate": 1.6033487914007677e-07, "loss": 0.00030446445452980697, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4968, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.25, "completions/mean_length": 83.42708587646484, "completions/min_length": 32.25, "epoch": 7.405063291139241, "grad_norm": 0.003958404859934633, "kl": 0.294921875, "learning_rate": 1.6016136976091766e-07, "loss": 0.0002944519801530987, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4969, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 87.63541984558105, "completions/min_length": 39.75, "epoch": 7.406552494415488, "grad_norm": 2.1943901938758916, "kl": 0.31494140625, "learning_rate": 1.5998793640996415e-07, "loss": 0.01402708888053894, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4970, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 87.32291793823242, "completions/min_length": 40.75, "epoch": 7.4080416976917345, "grad_norm": 0.003396121001545161, "kl": 0.28173828125, "learning_rate": 1.598145791260163e-07, "loss": 0.000281535554677248, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4971, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 88.71875190734863, "completions/min_length": 43.0, "epoch": 7.409530900967982, "grad_norm": 1.2150406780078074, "kl": 0.299072265625, "learning_rate": 1.596412979478572e-07, "loss": -0.018624197691679, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4972, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.5, "completions/mean_length": 79.87500190734863, "completions/min_length": 42.75, "epoch": 7.411020104244229, "grad_norm": 1.7861771688878814, "kl": 0.3291015625, "learning_rate": 1.594680929142535e-07, "loss": 0.0003287321305833757, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4973, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 91.59375381469727, "completions/min_length": 43.75, "epoch": 7.412509307520477, "grad_norm": 0.0038644900662127327, "kl": 0.281494140625, "learning_rate": 1.5929496406395382e-07, "loss": 0.0002816548221744597, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4974, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.5, "completions/mean_length": 79.97916984558105, "completions/min_length": 36.0, "epoch": 7.4139985107967235, "grad_norm": 0.00365926435299342, "kl": 0.3330078125, "learning_rate": 1.591219114356907e-07, "loss": 0.0003331785264890641, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4975, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 90.56250190734863, "completions/min_length": 36.0, "epoch": 7.415487714072971, "grad_norm": 0.003566297011605711, "kl": 0.30029296875, "learning_rate": 1.589489350681791e-07, "loss": 0.00030020647682249546, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4976, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/mean_length": 86.90625190734863, "completions/min_length": 41.5, "epoch": 7.416976917349218, "grad_norm": 0.003898317802542614, "kl": 0.30126953125, "learning_rate": 1.5877603500011698e-07, "loss": 0.000300831685308367, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4977, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 90.59375381469727, "completions/min_length": 38.0, "epoch": 7.418466120625466, "grad_norm": 0.0037519420360917232, "kl": 0.29052734375, "learning_rate": 1.5860321127018538e-07, "loss": 0.00029024769901297987, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4978, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.75, "completions/mean_length": 80.72916984558105, "completions/min_length": 33.0, "epoch": 7.419955323901712, "grad_norm": 0.0038257608508221906, "kl": 0.3193359375, "learning_rate": 1.58430463917048e-07, "loss": 0.0003192923904862255, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4979, "train_speed(iter/s)": 0.027171 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 84.90625190734863, "completions/min_length": 30.75, "epoch": 7.42144452717796, "grad_norm": 0.7578878566996357, "kl": 0.41015625, "learning_rate": 1.5825779297935193e-07, "loss": -0.01710670441389084, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4980, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 79.92708587646484, "completions/min_length": 27.75, "epoch": 7.422933730454207, "grad_norm": 0.003310831873154444, "kl": 0.32080078125, "learning_rate": 1.5808519849572665e-07, "loss": 0.0003203594242222607, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4981, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 88.92708778381348, "completions/min_length": 33.0, "epoch": 7.424422933730455, "grad_norm": 1.2507420263590256, "kl": 0.28955078125, "learning_rate": 1.5791268050478483e-07, "loss": 0.0041749849915504456, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4982, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 89.57291984558105, "completions/min_length": 35.75, "epoch": 7.425912137006701, "grad_norm": 0.003450909737643805, "kl": 0.31201171875, "learning_rate": 1.577402390451219e-07, "loss": 0.00031172827584668994, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4983, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 91.96875190734863, "completions/min_length": 44.0, "epoch": 7.427401340282948, "grad_norm": 0.0037393000738736868, "kl": 0.28125, "learning_rate": 1.5756787415531608e-07, "loss": 0.00028121151262894273, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4984, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 89.85416793823242, "completions/min_length": 41.25, "epoch": 7.428890543559196, "grad_norm": 0.0038110345272101393, "kl": 0.29931640625, "learning_rate": 1.5739558587392888e-07, "loss": 0.00029914750484749675, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4985, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 90.85416984558105, "completions/min_length": 40.25, "epoch": 7.430379746835443, "grad_norm": 0.043490592635327285, "kl": 0.30029296875, "learning_rate": 1.5722337423950427e-07, "loss": 0.00029984081629663706, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4986, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 90.35416793823242, "completions/min_length": 43.5, "epoch": 7.43186895011169, "grad_norm": 0.0036451298599940694, "kl": 0.2890625, "learning_rate": 1.5705123929056913e-07, "loss": 0.0002884402056224644, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4987, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.0, "completions/mean_length": 85.42708587646484, "completions/min_length": 30.75, "epoch": 7.433358153387937, "grad_norm": 1.3190415756666194, "kl": 0.3076171875, "learning_rate": 1.5687918106563325e-07, "loss": 0.013577910140156746, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4988, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 93.47916984558105, "completions/min_length": 45.5, "epoch": 7.434847356664185, "grad_norm": 0.0036765098917085066, "kl": 0.28759765625, "learning_rate": 1.5670719960318935e-07, "loss": 0.00028819695580750704, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4989, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 94.40625381469727, "completions/min_length": 41.25, "epoch": 7.436336559940432, "grad_norm": 0.0035232065158249852, "kl": 0.302734375, "learning_rate": 1.5653529494171252e-07, "loss": 0.0003025196201633662, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4990, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 90.17708587646484, "completions/min_length": 35.25, "epoch": 7.437825763216679, "grad_norm": 0.5208026484228834, "kl": 0.29736328125, "learning_rate": 1.5636346711966152e-07, "loss": 0.0015989344101399183, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4991, "train_speed(iter/s)": 0.027164 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 95.55208587646484, "completions/min_length": 40.0, "epoch": 7.439314966492926, "grad_norm": 0.004578624257337989, "kl": 0.245361328125, "learning_rate": 1.5619171617547717e-07, "loss": 0.0002450466272421181, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4992, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 97.46875381469727, "completions/min_length": 36.75, "epoch": 7.440804169769174, "grad_norm": 1.4030281789317403, "kl": 0.27783203125, "learning_rate": 1.5602004214758335e-07, "loss": 0.004525023978203535, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4993, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 87.75000190734863, "completions/min_length": 37.75, "epoch": 7.442293373045421, "grad_norm": 0.0036824690570420224, "kl": 0.3017578125, "learning_rate": 1.5584844507438676e-07, "loss": 0.0003015642869286239, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4994, "train_speed(iter/s)": 0.027165 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.0, "completions/mean_length": 89.38541984558105, "completions/min_length": 38.0, "epoch": 7.443782576321668, "grad_norm": 0.004060276574480308, "kl": 0.2958984375, "learning_rate": 1.556769249942767e-07, "loss": 0.0002953898801933974, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4995, "train_speed(iter/s)": 0.027167 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.25, "completions/mean_length": 89.41666984558105, "completions/min_length": 35.0, "epoch": 7.445271779597915, "grad_norm": 0.0034329848843345674, "kl": 0.29541015625, "learning_rate": 1.5550548194562564e-07, "loss": 0.0002953889488708228, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4996, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.5, "completions/mean_length": 87.88541984558105, "completions/min_length": 41.75, "epoch": 7.446760982874162, "grad_norm": 0.003992818820452678, "kl": 0.306640625, "learning_rate": 1.5533411596678842e-07, "loss": 0.00030653245630674064, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4997, "train_speed(iter/s)": 0.027168 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.5, "completions/mean_length": 81.63541793823242, "completions/min_length": 37.75, "epoch": 7.44825018615041, "grad_norm": 0.003697359578714874, "kl": 0.3037109375, "learning_rate": 1.5516282709610285e-07, "loss": 0.00030338732176460326, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4998, "train_speed(iter/s)": 0.027169 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.75, "completions/mean_length": 87.15625190734863, "completions/min_length": 42.75, "epoch": 7.449739389426656, "grad_norm": 0.003503850407698262, "kl": 0.294921875, "learning_rate": 1.5499161537188944e-07, "loss": 0.00029566153534688056, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 4999, "train_speed(iter/s)": 0.02717 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 90.64583778381348, "completions/min_length": 38.5, "epoch": 7.451228592702904, "grad_norm": 0.0034443586286544425, "kl": 0.29150390625, "learning_rate": 1.5482048083245114e-07, "loss": 0.00029146127053536475, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5000, "train_speed(iter/s)": 0.027166 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.0, "completions/mean_length": 94.83333587646484, "completions/min_length": 40.5, "epoch": 7.452717795979151, "grad_norm": 0.0033568975531147894, "kl": 0.2841796875, "learning_rate": 1.546494235160745e-07, "loss": 0.0002843272523023188, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5001, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 87.71875381469727, "completions/min_length": 44.75, "epoch": 7.4542069992553985, "grad_norm": 1.278200427021593, "kl": 0.29541015625, "learning_rate": 1.5447844346102763e-07, "loss": 0.018245957791805267, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5002, "train_speed(iter/s)": 0.027155 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 89.16666984558105, "completions/min_length": 35.25, "epoch": 7.455696202531645, "grad_norm": 0.003664844187925678, "kl": 0.27490234375, "learning_rate": 1.5430754070556228e-07, "loss": 0.000274797435849905, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5003, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 82.01041984558105, "completions/min_length": 32.25, "epoch": 7.457185405807893, "grad_norm": 0.0034193845534184393, "kl": 0.29150390625, "learning_rate": 1.5413671528791256e-07, "loss": 0.000291438220301643, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5004, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 84.00000381469727, "completions/min_length": 38.25, "epoch": 7.45867460908414, "grad_norm": 0.003786388824280176, "kl": 0.31201171875, "learning_rate": 1.5396596724629495e-07, "loss": 0.00031192132155410945, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5005, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.25, "completions/mean_length": 80.52083396911621, "completions/min_length": 36.75, "epoch": 7.4601638123603875, "grad_norm": 0.003993047765194798, "kl": 0.30615234375, "learning_rate": 1.5379529661890955e-07, "loss": 0.00030626144143752754, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5006, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 92.17708396911621, "completions/min_length": 36.25, "epoch": 7.461653015636634, "grad_norm": 0.0037263554521554773, "kl": 0.298828125, "learning_rate": 1.5362470344393792e-07, "loss": 0.00029874040046706796, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5007, "train_speed(iter/s)": 0.027155 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 91.70833587646484, "completions/min_length": 35.25, "epoch": 7.463142218912882, "grad_norm": 0.003728422643289115, "kl": 0.30224609375, "learning_rate": 1.5345418775954533e-07, "loss": 0.0003024482575710863, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5008, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 92.63541793823242, "completions/min_length": 41.0, "epoch": 7.464631422189129, "grad_norm": 0.0038780071731268468, "kl": 0.302734375, "learning_rate": 1.532837496038792e-07, "loss": 0.0003024632460437715, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5009, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 158.0, "completions/mean_length": 78.32291889190674, "completions/min_length": 36.75, "epoch": 7.466120625465376, "grad_norm": 1.9909296535472378, "kl": 0.29833984375, "learning_rate": 1.531133890150695e-07, "loss": -0.007902906276285648, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5010, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.5, "completions/mean_length": 86.38541984558105, "completions/min_length": 36.5, "epoch": 7.467609828741623, "grad_norm": 1.2168821846006272, "kl": 0.30419921875, "learning_rate": 1.529431060312295e-07, "loss": 0.011301273480057716, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.12724433839321136, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5011, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 83.96875190734863, "completions/min_length": 43.75, "epoch": 7.46909903201787, "grad_norm": 0.003591262067567917, "kl": 0.296875, "learning_rate": 1.527729006904541e-07, "loss": 0.00029696052661165595, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5012, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 87.94791793823242, "completions/min_length": 38.75, "epoch": 7.470588235294118, "grad_norm": 1.9025154603267749, "kl": 0.3095703125, "learning_rate": 1.5260277303082202e-07, "loss": -0.02904626540839672, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.06803862750530243, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5013, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 91.26041984558105, "completions/min_length": 43.25, "epoch": 7.4720774385703645, "grad_norm": 0.003676862622046727, "kl": 0.2783203125, "learning_rate": 1.5243272309039334e-07, "loss": 0.00027773663168773055, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5014, "train_speed(iter/s)": 0.027155 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 88.87500190734863, "completions/min_length": 44.0, "epoch": 7.473566641846612, "grad_norm": 0.06977518708387251, "kl": 0.2861328125, "learning_rate": 1.522627509072118e-07, "loss": 0.0002866433351300657, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5015, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 92.78125190734863, "completions/min_length": 41.25, "epoch": 7.475055845122859, "grad_norm": 0.003620815704730971, "kl": 0.29345703125, "learning_rate": 1.5209285651930327e-07, "loss": 0.0002934914082288742, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5016, "train_speed(iter/s)": 0.027157 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 84.59375381469727, "completions/min_length": 41.75, "epoch": 7.476545048399107, "grad_norm": 0.004096058774648782, "kl": 0.3125, "learning_rate": 1.5192303996467604e-07, "loss": 0.00031249038875102997, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5017, "train_speed(iter/s)": 0.027158 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 85.40625381469727, "completions/min_length": 38.75, "epoch": 7.4780342516753535, "grad_norm": 0.004106462865015424, "kl": 0.3056640625, "learning_rate": 1.517533012813217e-07, "loss": 0.0003059270093217492, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5018, "train_speed(iter/s)": 0.027158 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.75, "completions/mean_length": 79.31250190734863, "completions/min_length": 39.75, "epoch": 7.479523454951601, "grad_norm": 0.0035871297754726697, "kl": 0.3212890625, "learning_rate": 1.5158364050721334e-07, "loss": 0.0003206860856153071, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5019, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.75, "completions/mean_length": 83.48958587646484, "completions/min_length": 38.25, "epoch": 7.481012658227848, "grad_norm": 1.1267197837206069, "kl": 0.3046875, "learning_rate": 1.5141405768030767e-07, "loss": 0.006073374301195145, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5020, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 87.78125190734863, "completions/min_length": 37.0, "epoch": 7.482501861504096, "grad_norm": 0.003683364351068875, "kl": 0.31298828125, "learning_rate": 1.5124455283854337e-07, "loss": 0.00031326510361395776, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5021, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 94.18750190734863, "completions/min_length": 45.25, "epoch": 7.4839910647803425, "grad_norm": 0.0041585667326330625, "kl": 0.267578125, "learning_rate": 1.5107512601984167e-07, "loss": 0.0002677954616956413, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5022, "train_speed(iter/s)": 0.027155 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.5, "completions/mean_length": 87.21875381469727, "completions/min_length": 42.0, "epoch": 7.485480268056589, "grad_norm": 1.8214893597957824, "kl": 0.318359375, "learning_rate": 1.509057772621068e-07, "loss": 0.0007704924792051315, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5023, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 80.09375190734863, "completions/min_length": 34.5, "epoch": 7.486969471332837, "grad_norm": 1.8666555882663083, "kl": 0.31591796875, "learning_rate": 1.5073650660322506e-07, "loss": 0.021308597177267075, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5024, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 92.18750381469727, "completions/min_length": 41.25, "epoch": 7.488458674609084, "grad_norm": 0.0034809977409881046, "kl": 0.3017578125, "learning_rate": 1.505673140810655e-07, "loss": 0.00030199496541172266, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5025, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 91.28125190734863, "completions/min_length": 38.5, "epoch": 7.4899478778853315, "grad_norm": 0.004049365161585908, "kl": 0.275390625, "learning_rate": 1.5039819973347945e-07, "loss": 0.0002754111192189157, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5026, "train_speed(iter/s)": 0.027152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.25, "completions/mean_length": 94.11458778381348, "completions/min_length": 37.0, "epoch": 7.491437081161578, "grad_norm": 0.0032368557808748555, "kl": 0.27880859375, "learning_rate": 1.502291635983011e-07, "loss": 0.0002784689422696829, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5027, "train_speed(iter/s)": 0.027152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.75, "completions/mean_length": 101.06250190734863, "completions/min_length": 41.0, "epoch": 7.492926284437826, "grad_norm": 0.0033034970097289822, "kl": 0.27685546875, "learning_rate": 1.5006020571334675e-07, "loss": 0.00027662247885018587, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5028, "train_speed(iter/s)": 0.02715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.0, "completions/mean_length": 83.22916793823242, "completions/min_length": 32.25, "epoch": 7.494415487714073, "grad_norm": 0.003754007388396906, "kl": 0.32861328125, "learning_rate": 1.4989132611641575e-07, "loss": 0.0003277427749708295, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5029, "train_speed(iter/s)": 0.027146 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.25, "completions/mean_length": 80.39583587646484, "completions/min_length": 34.5, "epoch": 7.49590469099032, "grad_norm": 0.003998271639541666, "kl": 0.322265625, "learning_rate": 1.4972252484528935e-07, "loss": 0.0003224962274543941, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5030, "train_speed(iter/s)": 0.027148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 87.53125381469727, "completions/min_length": 41.0, "epoch": 7.497393894266567, "grad_norm": 0.0034970516396366606, "kl": 0.272705078125, "learning_rate": 1.4955380193773166e-07, "loss": 0.0002730457345023751, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5031, "train_speed(iter/s)": 0.027148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 84.34375190734863, "completions/min_length": 37.25, "epoch": 7.498883097542815, "grad_norm": 1.5170858697774217, "kl": 0.28662109375, "learning_rate": 1.4938515743148906e-07, "loss": -0.00252345809713006, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5032, "train_speed(iter/s)": 0.027149 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 87.87500190734863, "completions/min_length": 42.25, "epoch": 7.500372300819062, "grad_norm": 1.4761238645844212, "kl": 0.289794921875, "learning_rate": 1.4921659136429021e-07, "loss": 0.011926702223718166, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5033, "train_speed(iter/s)": 0.02715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 161.75, "completions/mean_length": 86.29166984558105, "completions/min_length": 41.25, "epoch": 7.501861504095309, "grad_norm": 0.012813642556328783, "kl": 0.3115234375, "learning_rate": 1.4904810377384697e-07, "loss": 0.0003113717830274254, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5034, "train_speed(iter/s)": 0.027151 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 86.64583587646484, "completions/min_length": 36.75, "epoch": 7.503350707371556, "grad_norm": 0.0035294917084990047, "kl": 0.29296875, "learning_rate": 1.4887969469785284e-07, "loss": 0.00029284623451530933, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5035, "train_speed(iter/s)": 0.027152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 96.84375190734863, "completions/min_length": 42.5, "epoch": 7.504839910647803, "grad_norm": 0.0033881927945832004, "kl": 0.27783203125, "learning_rate": 1.4871136417398405e-07, "loss": 0.00027751215384341776, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5036, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 87.48958778381348, "completions/min_length": 35.0, "epoch": 7.506329113924051, "grad_norm": 0.003678890413669714, "kl": 0.3056640625, "learning_rate": 1.4854311223989934e-07, "loss": 0.0003052699030376971, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5037, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.75, "completions/mean_length": 82.72916984558105, "completions/min_length": 28.5, "epoch": 7.5078183172002975, "grad_norm": 0.004122961185150102, "kl": 0.3095703125, "learning_rate": 1.483749389332396e-07, "loss": 0.0003088295634370297, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5038, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 86.20833587646484, "completions/min_length": 40.5, "epoch": 7.509307520476545, "grad_norm": 0.0037617390364209123, "kl": 0.28271484375, "learning_rate": 1.4820684429162877e-07, "loss": 0.00028229577583260834, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5039, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 86.63541984558105, "completions/min_length": 40.75, "epoch": 7.510796723752792, "grad_norm": 0.0039004164470465225, "kl": 0.3173828125, "learning_rate": 1.4803882835267212e-07, "loss": 0.00031745305750519037, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5040, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 87.02083587646484, "completions/min_length": 36.75, "epoch": 7.51228592702904, "grad_norm": 0.0036053572175274847, "kl": 0.31787109375, "learning_rate": 1.4787089115395844e-07, "loss": 0.00031784651218913496, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5041, "train_speed(iter/s)": 0.027152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 80.48958587646484, "completions/min_length": 34.25, "epoch": 7.513775130305286, "grad_norm": 0.003835645464443429, "kl": 0.30078125, "learning_rate": 1.4770303273305817e-07, "loss": 0.0003008584026247263, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5042, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 84.27083587646484, "completions/min_length": 29.25, "epoch": 7.515264333581534, "grad_norm": 0.004914479006102424, "kl": 0.31640625, "learning_rate": 1.4753525312752425e-07, "loss": 0.0003166206297464669, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5043, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 83.45833587646484, "completions/min_length": 30.0, "epoch": 7.516753536857781, "grad_norm": 0.003896124381012814, "kl": 0.29248046875, "learning_rate": 1.4736755237489256e-07, "loss": 0.0002926648885477334, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5044, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.25, "completions/mean_length": 91.45833587646484, "completions/min_length": 31.75, "epoch": 7.518242740134029, "grad_norm": 0.009115587563719177, "kl": 0.298828125, "learning_rate": 1.471999305126802e-07, "loss": 0.00029859045753255486, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5045, "train_speed(iter/s)": 0.027155 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 87.40625, "completions/min_length": 39.5, "epoch": 7.519731943410275, "grad_norm": 1.313357159870853, "kl": 0.298828125, "learning_rate": 1.470323875783878e-07, "loss": 0.003062313888221979, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5046, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 86.58333587646484, "completions/min_length": 39.25, "epoch": 7.521221146686523, "grad_norm": 0.0038065535975478374, "kl": 0.30712890625, "learning_rate": 1.4686492360949764e-07, "loss": 0.0003072629333473742, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5047, "train_speed(iter/s)": 0.027156 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 89.73958778381348, "completions/min_length": 40.0, "epoch": 7.52271034996277, "grad_norm": 0.0032180183328250297, "kl": 0.28759765625, "learning_rate": 1.4669753864347439e-07, "loss": 0.00028794363606721163, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5048, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 78.72916984558105, "completions/min_length": 35.25, "epoch": 7.524199553239017, "grad_norm": 0.00419679206411593, "kl": 0.33154296875, "learning_rate": 1.465302327177656e-07, "loss": 0.00033149024238809943, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5049, "train_speed(iter/s)": 0.027151 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.0, "completions/mean_length": 84.50000381469727, "completions/min_length": 37.0, "epoch": 7.525688756515264, "grad_norm": 0.003570223522014358, "kl": 0.270263671875, "learning_rate": 1.463630058698001e-07, "loss": 0.00027055980172008276, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5050, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 90.76041793823242, "completions/min_length": 41.0, "epoch": 7.527177959791511, "grad_norm": 0.003557385887763233, "kl": 0.29541015625, "learning_rate": 1.461958581369903e-07, "loss": 0.00029535050271078944, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5051, "train_speed(iter/s)": 0.027151 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.0, "completions/mean_length": 84.97916984558105, "completions/min_length": 39.25, "epoch": 7.528667163067759, "grad_norm": 0.004027913566467131, "kl": 0.30859375, "learning_rate": 1.460287895567296e-07, "loss": 0.0003087869263254106, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5052, "train_speed(iter/s)": 0.027152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.0, "completions/mean_length": 99.86458778381348, "completions/min_length": 39.5, "epoch": 7.530156366344006, "grad_norm": 2.040118445080737, "kl": 0.29345703125, "learning_rate": 1.4586180016639475e-07, "loss": 0.031609196215867996, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5053, "train_speed(iter/s)": 0.027152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 83.95833587646484, "completions/min_length": 38.5, "epoch": 7.531645569620253, "grad_norm": 0.00356529539744405, "kl": 0.30126953125, "learning_rate": 1.4569489000334433e-07, "loss": 0.00030143820913508534, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5054, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 90.27083396911621, "completions/min_length": 31.25, "epoch": 7.5331347728965, "grad_norm": 0.4769468594926718, "kl": 0.2958984375, "learning_rate": 1.45528059104919e-07, "loss": -0.024460360407829285, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5055, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 89.79167175292969, "completions/min_length": 39.5, "epoch": 7.534623976172748, "grad_norm": 1.5817740585017597, "kl": 0.27197265625, "learning_rate": 1.4536130750844238e-07, "loss": 0.011178030632436275, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5056, "train_speed(iter/s)": 0.027154 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 89.28125190734863, "completions/min_length": 38.0, "epoch": 7.536113179448995, "grad_norm": 0.010604327534227066, "kl": 0.288330078125, "learning_rate": 1.4519463525121933e-07, "loss": 0.0002882579865399748, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5057, "train_speed(iter/s)": 0.027151 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 85.75000381469727, "completions/min_length": 32.0, "epoch": 7.537602382725242, "grad_norm": 0.003713190963395588, "kl": 0.3046875, "learning_rate": 1.4502804237053802e-07, "loss": 0.0003044230688828975, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5058, "train_speed(iter/s)": 0.027153 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 98.14583396911621, "completions/min_length": 40.0, "epoch": 7.539091586001489, "grad_norm": 0.003275370006988263, "kl": 0.27392578125, "learning_rate": 1.4486152890366808e-07, "loss": 0.000274195073870942, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5059, "train_speed(iter/s)": 0.027151 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 94.73958587646484, "completions/min_length": 33.0, "epoch": 7.540580789277737, "grad_norm": 0.0032938743922784797, "kl": 0.28564453125, "learning_rate": 1.4469509488786164e-07, "loss": 0.00028537295293062925, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5060, "train_speed(iter/s)": 0.02715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 87.00000381469727, "completions/min_length": 40.25, "epoch": 7.542069992553984, "grad_norm": 0.8737844493119439, "kl": 0.32177734375, "learning_rate": 1.445287403603535e-07, "loss": 0.004435595124959946, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5061, "train_speed(iter/s)": 0.02715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 85.23958587646484, "completions/min_length": 34.25, "epoch": 7.54355919583023, "grad_norm": 0.003726006192147191, "kl": 0.30322265625, "learning_rate": 1.443624653583596e-07, "loss": 0.0003029178478755057, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5062, "train_speed(iter/s)": 0.027149 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.5, "completions/mean_length": 95.27083587646484, "completions/min_length": 37.5, "epoch": 7.545048399106478, "grad_norm": 0.6743363008087758, "kl": 0.28955078125, "learning_rate": 1.4419626991907924e-07, "loss": -0.0024561043828725815, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5063, "train_speed(iter/s)": 0.027147 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 96.97916984558105, "completions/min_length": 37.75, "epoch": 7.546537602382725, "grad_norm": 0.0034817035510825997, "kl": 0.27880859375, "learning_rate": 1.4403015407969326e-07, "loss": 0.0002785554388538003, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5064, "train_speed(iter/s)": 0.027148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 93.59375190734863, "completions/min_length": 37.0, "epoch": 7.5480268056589725, "grad_norm": 0.003114716337521765, "kl": 0.255859375, "learning_rate": 1.4386411787736485e-07, "loss": 0.00025587010895833373, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5065, "train_speed(iter/s)": 0.02715 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 87.06250190734863, "completions/min_length": 34.5, "epoch": 7.549516008935219, "grad_norm": 0.003244389108846612, "kl": 0.28759765625, "learning_rate": 1.436981613492394e-07, "loss": 0.0002868958399631083, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5066, "train_speed(iter/s)": 0.027151 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 90.52083587646484, "completions/min_length": 38.5, "epoch": 7.551005212211467, "grad_norm": 2.177379967996145, "kl": 0.30322265625, "learning_rate": 1.4353228453244432e-07, "loss": -0.001284886384382844, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5067, "train_speed(iter/s)": 0.027152 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.75, "completions/mean_length": 97.44791984558105, "completions/min_length": 40.75, "epoch": 7.552494415487714, "grad_norm": 1.429266611874955, "kl": 0.3369140625, "learning_rate": 1.4336648746408962e-07, "loss": 0.006822110619395971, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5068, "train_speed(iter/s)": 0.027149 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.5, "completions/mean_length": 82.57291984558105, "completions/min_length": 37.75, "epoch": 7.5539836187639615, "grad_norm": 0.0037553007130990703, "kl": 0.2939453125, "learning_rate": 1.4320077018126704e-07, "loss": 0.00029342510970309377, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5069, "train_speed(iter/s)": 0.027149 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 87.09375381469727, "completions/min_length": 43.75, "epoch": 7.555472822040208, "grad_norm": 1.546293341839934, "kl": 0.28369140625, "learning_rate": 1.4303513272105055e-07, "loss": 0.005203502252697945, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5070, "train_speed(iter/s)": 0.027148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 89.51041984558105, "completions/min_length": 41.0, "epoch": 7.556962025316456, "grad_norm": 0.0036521855870301394, "kl": 0.2822265625, "learning_rate": 1.4286957512049642e-07, "loss": 0.00028245305293239653, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5071, "train_speed(iter/s)": 0.027148 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 91.06250190734863, "completions/min_length": 38.75, "epoch": 7.558451228592703, "grad_norm": 0.003842783722650651, "kl": 0.29833984375, "learning_rate": 1.4270409741664268e-07, "loss": 0.00029875291511416435, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5072, "train_speed(iter/s)": 0.027149 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.25, "completions/mean_length": 98.26041793823242, "completions/min_length": 36.25, "epoch": 7.5599404318689505, "grad_norm": 0.0034255660114103008, "kl": 0.29638671875, "learning_rate": 1.4253869964651023e-07, "loss": 0.0002958730619866401, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5073, "train_speed(iter/s)": 0.027146 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 172.25, "completions/mean_length": 89.78125190734863, "completions/min_length": 37.25, "epoch": 7.561429635145197, "grad_norm": 0.0035178950817444546, "kl": 0.29150390625, "learning_rate": 1.423733818471013e-07, "loss": 0.000291865726467222, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5074, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 94.58333587646484, "completions/min_length": 43.25, "epoch": 7.562918838421444, "grad_norm": 0.007358024083227555, "kl": 0.29443359375, "learning_rate": 1.4220814405540066e-07, "loss": 0.0002941730199381709, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5075, "train_speed(iter/s)": 0.027141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.25, "completions/mean_length": 87.28125190734863, "completions/min_length": 35.75, "epoch": 7.564408041697692, "grad_norm": 0.003936318678257956, "kl": 0.29736328125, "learning_rate": 1.4204298630837502e-07, "loss": 0.000297400401905179, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5076, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 96.40625190734863, "completions/min_length": 39.0, "epoch": 7.565897244973939, "grad_norm": 0.0031081232856074122, "kl": 0.26708984375, "learning_rate": 1.4187790864297327e-07, "loss": 0.0002668622473720461, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5077, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.0, "completions/mean_length": 85.19791984558105, "completions/min_length": 38.5, "epoch": 7.567386448250186, "grad_norm": 0.004118504992506515, "kl": 0.3056640625, "learning_rate": 1.4171291109612616e-07, "loss": 0.00030556932324543595, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5078, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 85.83333778381348, "completions/min_length": 39.75, "epoch": 7.568875651526433, "grad_norm": 1.2166649954796702, "kl": 0.296875, "learning_rate": 1.4154799370474702e-07, "loss": 0.013815384358167648, "memory(GiB)": 112.53, "reward": 1.9791666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5079, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 93.20833587646484, "completions/min_length": 41.5, "epoch": 7.570364854802681, "grad_norm": 0.003013412744097792, "kl": 0.296875, "learning_rate": 1.4138315650573074e-07, "loss": 0.0002967818290926516, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5080, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 80.44791793823242, "completions/min_length": 32.75, "epoch": 7.5718540580789275, "grad_norm": 0.005078494050002037, "kl": 0.3095703125, "learning_rate": 1.4121839953595439e-07, "loss": 0.00030869204783812165, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5081, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 85.53125381469727, "completions/min_length": 38.5, "epoch": 7.573343261355175, "grad_norm": 0.0038822453735099676, "kl": 0.287109375, "learning_rate": 1.4105372283227756e-07, "loss": 0.00028674909844994545, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5082, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.0, "completions/mean_length": 78.78125190734863, "completions/min_length": 27.75, "epoch": 7.574832464631422, "grad_norm": 0.003352794464199188, "kl": 0.30078125, "learning_rate": 1.4088912643154083e-07, "loss": 0.00030051666544750333, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5083, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 82.59375381469727, "completions/min_length": 38.5, "epoch": 7.57632166790767, "grad_norm": 0.003566967042332435, "kl": 0.314453125, "learning_rate": 1.4072461037056805e-07, "loss": 0.00031422291067428887, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5084, "train_speed(iter/s)": 0.027145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.25, "completions/mean_length": 94.13541793823242, "completions/min_length": 41.75, "epoch": 7.5778108711839165, "grad_norm": 0.003281699919712385, "kl": 0.291015625, "learning_rate": 1.4056017468616422e-07, "loss": 0.0002907415619120002, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5085, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 98.18750381469727, "completions/min_length": 40.0, "epoch": 7.579300074460164, "grad_norm": 2.037037506548043, "kl": 0.287109375, "learning_rate": 1.4039581941511665e-07, "loss": 0.03348264470696449, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.17693356797099113, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5086, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.5, "completions/mean_length": 87.13541984558105, "completions/min_length": 33.25, "epoch": 7.580789277736411, "grad_norm": 0.004594219327753853, "kl": 0.28759765625, "learning_rate": 1.4023154459419496e-07, "loss": 0.000287753704469651, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5087, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 89.25000190734863, "completions/min_length": 39.25, "epoch": 7.582278481012658, "grad_norm": 0.0031104630955938774, "kl": 0.2890625, "learning_rate": 1.4006735026014998e-07, "loss": 0.00028971213032491505, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5088, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 86.57291984558105, "completions/min_length": 40.5, "epoch": 7.5837676842889055, "grad_norm": 0.003556119285527238, "kl": 0.29833984375, "learning_rate": 1.399032364497156e-07, "loss": 0.00029885751428082585, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5089, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 82.94791984558105, "completions/min_length": 35.5, "epoch": 7.585256887565152, "grad_norm": 0.003306315076339211, "kl": 0.31201171875, "learning_rate": 1.3973920319960652e-07, "loss": 0.0003117667802143842, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5090, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 95.36458396911621, "completions/min_length": 45.75, "epoch": 7.5867460908414, "grad_norm": 0.0031961228781025165, "kl": 0.279296875, "learning_rate": 1.3957525054652047e-07, "loss": 0.00027940055588260293, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5091, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 93.52083587646484, "completions/min_length": 37.75, "epoch": 7.588235294117647, "grad_norm": 0.0031949829745215566, "kl": 0.27294921875, "learning_rate": 1.3941137852713653e-07, "loss": 0.00027230550767853856, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5092, "train_speed(iter/s)": 0.027141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.25, "completions/mean_length": 85.92708587646484, "completions/min_length": 40.75, "epoch": 7.589724497393894, "grad_norm": 0.004149959222462355, "kl": 0.3037109375, "learning_rate": 1.392475871781158e-07, "loss": 0.00030396878719329834, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5093, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 88.52083587646484, "completions/min_length": 37.75, "epoch": 7.591213700670141, "grad_norm": 0.003283710596999221, "kl": 0.271240234375, "learning_rate": 1.3908387653610187e-07, "loss": 0.0002711862907744944, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5094, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.5, "completions/mean_length": 96.47916984558105, "completions/min_length": 44.0, "epoch": 7.592702903946389, "grad_norm": 0.00319231722847953, "kl": 0.288330078125, "learning_rate": 1.389202466377193e-07, "loss": 0.00028823615866713226, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5095, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 157.75, "completions/mean_length": 83.25000190734863, "completions/min_length": 43.75, "epoch": 7.594192107222636, "grad_norm": 0.003269552166788282, "kl": 0.29248046875, "learning_rate": 1.3875669751957548e-07, "loss": 0.00029219838324934244, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5096, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 83.37500190734863, "completions/min_length": 36.25, "epoch": 7.595681310498883, "grad_norm": 0.0029692365446255093, "kl": 0.290771484375, "learning_rate": 1.3859322921825928e-07, "loss": 0.000291069271042943, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5097, "train_speed(iter/s)": 0.027146 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.25, "completions/mean_length": 86.78125190734863, "completions/min_length": 35.25, "epoch": 7.59717051377513, "grad_norm": 0.003527234542118342, "kl": 0.31005859375, "learning_rate": 1.3842984177034145e-07, "loss": 0.00031009112717583776, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5098, "train_speed(iter/s)": 0.027145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 82.54166984558105, "completions/min_length": 34.75, "epoch": 7.598659717051378, "grad_norm": 0.003584134215493693, "kl": 0.32275390625, "learning_rate": 1.3826653521237526e-07, "loss": 0.0003226190456189215, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5099, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 83.92708587646484, "completions/min_length": 34.5, "epoch": 7.600148920327625, "grad_norm": 0.027882139733180013, "kl": 0.31689453125, "learning_rate": 1.3810330958089478e-07, "loss": 0.0003172466531395912, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5100, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 93.57291984558105, "completions/min_length": 36.75, "epoch": 7.6016381236038715, "grad_norm": 0.003449344032994666, "kl": 0.28955078125, "learning_rate": 1.379401649124171e-07, "loss": 0.0002894336939789355, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5101, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 91.85416793823242, "completions/min_length": 43.75, "epoch": 7.603127326880119, "grad_norm": 0.002788089811780492, "kl": 0.2685546875, "learning_rate": 1.3777710124344055e-07, "loss": 0.0002684405189938843, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5102, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.25, "completions/mean_length": 88.05208587646484, "completions/min_length": 42.0, "epoch": 7.604616530156366, "grad_norm": 1.3816060761544868, "kl": 0.273681640625, "learning_rate": 1.3761411861044548e-07, "loss": 0.0025749527849256992, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5103, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 102.00000190734863, "completions/min_length": 37.75, "epoch": 7.606105733432614, "grad_norm": 0.0029178203005025237, "kl": 0.264404296875, "learning_rate": 1.374512170498941e-07, "loss": 0.0002646560315042734, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5104, "train_speed(iter/s)": 0.02714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 90.40625190734863, "completions/min_length": 38.0, "epoch": 7.6075949367088604, "grad_norm": 0.0031556570470170015, "kl": 0.275390625, "learning_rate": 1.3728839659823045e-07, "loss": 0.00027545771445147693, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5105, "train_speed(iter/s)": 0.027139 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 97.39583778381348, "completions/min_length": 40.5, "epoch": 7.609084139985108, "grad_norm": 0.003354953841906261, "kl": 0.28125, "learning_rate": 1.3712565729188062e-07, "loss": 0.0002817485947161913, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5106, "train_speed(iter/s)": 0.027138 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 175.0, "completions/mean_length": 86.14583587646484, "completions/min_length": 45.5, "epoch": 7.610573343261355, "grad_norm": 1.3565134288386658, "kl": 0.29541015625, "learning_rate": 1.369629991672525e-07, "loss": -0.003166424110531807, "memory(GiB)": 112.53, "reward": 1.6041666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666697710752, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5107, "train_speed(iter/s)": 0.02714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 101.18750381469727, "completions/min_length": 39.0, "epoch": 7.612062546537603, "grad_norm": 2.4141217885243225, "kl": 0.2587890625, "learning_rate": 1.368004222607355e-07, "loss": -0.012626441195607185, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3186568021774292, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5108, "train_speed(iter/s)": 0.027141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 85.83333778381348, "completions/min_length": 33.5, "epoch": 7.613551749813849, "grad_norm": 0.0032923153961021824, "kl": 0.296875, "learning_rate": 1.3663792660870128e-07, "loss": 0.00029728206573054194, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5109, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 86.85416793823242, "completions/min_length": 31.0, "epoch": 7.615040953090097, "grad_norm": 0.0036303252072076636, "kl": 0.31005859375, "learning_rate": 1.364755122475028e-07, "loss": 0.0003099989262409508, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5110, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 89.57291793823242, "completions/min_length": 41.5, "epoch": 7.616530156366344, "grad_norm": 0.0034351870760662775, "kl": 0.27783203125, "learning_rate": 1.3631317921347562e-07, "loss": 0.00027739768847823143, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5111, "train_speed(iter/s)": 0.027144 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 86.57291793823242, "completions/min_length": 46.75, "epoch": 7.618019359642592, "grad_norm": 0.0034557801086738935, "kl": 0.28271484375, "learning_rate": 1.3615092754293643e-07, "loss": 0.0002825126575771719, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5112, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 92.36458587646484, "completions/min_length": 41.25, "epoch": 7.619508562918838, "grad_norm": 0.003117273454531988, "kl": 0.27734375, "learning_rate": 1.3598875727218396e-07, "loss": 0.0002774120366666466, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5113, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 89.52083396911621, "completions/min_length": 43.25, "epoch": 7.620997766195085, "grad_norm": 0.003613039482816647, "kl": 0.289306640625, "learning_rate": 1.358266684374987e-07, "loss": 0.00028899434255436063, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5114, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.25, "completions/mean_length": 97.21875190734863, "completions/min_length": 39.75, "epoch": 7.622486969471333, "grad_norm": 0.0030993973820354426, "kl": 0.28515625, "learning_rate": 1.3566466107514284e-07, "loss": 0.00028509245021268725, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5115, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 151.0, "completions/mean_length": 78.17708587646484, "completions/min_length": 34.25, "epoch": 7.62397617274758, "grad_norm": 2.16144289311537, "kl": 0.3076171875, "learning_rate": 1.3550273522136048e-07, "loss": 0.0017855990445241332, "memory(GiB)": 112.53, "reward": 1.5312500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.5312500149011612, "rewards/CineAccuracyORM/std": 0.464870348572731, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5116, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 90.14583587646484, "completions/min_length": 37.25, "epoch": 7.625465376023827, "grad_norm": 0.0033133124104906216, "kl": 0.2939453125, "learning_rate": 1.3534089091237756e-07, "loss": 0.00029307790100574493, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5117, "train_speed(iter/s)": 0.027145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.25, "completions/mean_length": 97.89583396911621, "completions/min_length": 36.75, "epoch": 7.626954579300074, "grad_norm": 0.0030086645573178714, "kl": 0.2744140625, "learning_rate": 1.3517912818440152e-07, "loss": 0.0002741107309702784, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5118, "train_speed(iter/s)": 0.027145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 85.17708587646484, "completions/min_length": 48.0, "epoch": 7.628443782576322, "grad_norm": 1.307741240649006, "kl": 0.302001953125, "learning_rate": 1.3501744707362178e-07, "loss": -0.003011713270097971, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5119, "train_speed(iter/s)": 0.027147 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 87.18750381469727, "completions/min_length": 37.0, "epoch": 7.629932985852569, "grad_norm": 0.04878316916184117, "kl": 0.34326171875, "learning_rate": 1.3485584761620938e-07, "loss": 0.00034326533204875886, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5120, "train_speed(iter/s)": 0.027146 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 81.11458778381348, "completions/min_length": 37.75, "epoch": 7.631422189128816, "grad_norm": 3.659744649925109, "kl": 0.275390625, "learning_rate": 1.3469432984831692e-07, "loss": -0.002115103881806135, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5121, "train_speed(iter/s)": 0.027145 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 90.75000381469727, "completions/min_length": 36.0, "epoch": 7.632911392405063, "grad_norm": 0.0031922736117467074, "kl": 0.30224609375, "learning_rate": 1.3453289380607918e-07, "loss": 0.0003016510745510459, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5122, "train_speed(iter/s)": 0.027146 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.5, "completions/mean_length": 92.37500190734863, "completions/min_length": 44.5, "epoch": 7.634400595681311, "grad_norm": 1.2214914482157921, "kl": 0.303955078125, "learning_rate": 1.3437153952561237e-07, "loss": -0.019800003618001938, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833386108279, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5123, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.75, "completions/mean_length": 91.70833396911621, "completions/min_length": 49.0, "epoch": 7.635889798957558, "grad_norm": 0.0034649935505408277, "kl": 0.27734375, "learning_rate": 1.3421026704301437e-07, "loss": 0.00027736631454899907, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5124, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 99.84375190734863, "completions/min_length": 44.5, "epoch": 7.637379002233805, "grad_norm": 0.009528055906485616, "kl": 0.27783203125, "learning_rate": 1.340490763943648e-07, "loss": 0.00027700827922672033, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5125, "train_speed(iter/s)": 0.02714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 89.36458587646484, "completions/min_length": 37.5, "epoch": 7.638868205510052, "grad_norm": 0.003016960753601426, "kl": 0.28466796875, "learning_rate": 1.338879676157249e-07, "loss": 0.0002844432310666889, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5126, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 87.08333587646484, "completions/min_length": 38.0, "epoch": 7.640357408786299, "grad_norm": 0.0034108392521687947, "kl": 0.298828125, "learning_rate": 1.3372694074313812e-07, "loss": 0.00029787543462589383, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5127, "train_speed(iter/s)": 0.027141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 90.45833587646484, "completions/min_length": 37.0, "epoch": 7.641846612062547, "grad_norm": 0.0036512724456403793, "kl": 0.29248046875, "learning_rate": 1.3356599581262856e-07, "loss": 0.00029257099959068, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5128, "train_speed(iter/s)": 0.02714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.25, "completions/mean_length": 98.08333587646484, "completions/min_length": 43.0, "epoch": 7.643335815338793, "grad_norm": 0.003154537206370737, "kl": 0.291015625, "learning_rate": 1.3340513286020306e-07, "loss": 0.0002907759044319391, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5129, "train_speed(iter/s)": 0.027141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.0, "completions/mean_length": 82.68750190734863, "completions/min_length": 38.25, "epoch": 7.644825018615041, "grad_norm": 0.0031603611356538744, "kl": 0.29296875, "learning_rate": 1.332443519218494e-07, "loss": 0.0002933531941380352, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5130, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.5, "completions/mean_length": 92.56250381469727, "completions/min_length": 41.5, "epoch": 7.646314221891288, "grad_norm": 0.6132141361743585, "kl": 0.2880859375, "learning_rate": 1.3308365303353725e-07, "loss": -0.0163213312625885, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5131, "train_speed(iter/s)": 0.027142 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 85.03125190734863, "completions/min_length": 36.25, "epoch": 7.6478034251675355, "grad_norm": 1.9775852221002317, "kl": 0.29833984375, "learning_rate": 1.3292303623121827e-07, "loss": 0.0024789273738861084, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5132, "train_speed(iter/s)": 0.027141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 83.69791984558105, "completions/min_length": 32.75, "epoch": 7.649292628443782, "grad_norm": 0.1325793181909687, "kl": 0.3369140625, "learning_rate": 1.3276250155082486e-07, "loss": 0.00033679086482152343, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5133, "train_speed(iter/s)": 0.027141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 87.34375190734863, "completions/min_length": 45.25, "epoch": 7.65078183172003, "grad_norm": 0.11094388768612695, "kl": 0.41748046875, "learning_rate": 1.3260204902827206e-07, "loss": 0.000418405223172158, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5134, "train_speed(iter/s)": 0.027143 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.25, "completions/mean_length": 97.96875190734863, "completions/min_length": 43.25, "epoch": 7.652271034996277, "grad_norm": 0.0032498907004221336, "kl": 0.294921875, "learning_rate": 1.324416786994559e-07, "loss": 0.0002946083841379732, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5135, "train_speed(iter/s)": 0.027141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 96.26041984558105, "completions/min_length": 38.5, "epoch": 7.6537602382725245, "grad_norm": 1.5573102178940195, "kl": 0.258544921875, "learning_rate": 1.3228139060025406e-07, "loss": 0.0174461230635643, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5136, "train_speed(iter/s)": 0.027141 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 98.32291793823242, "completions/min_length": 41.0, "epoch": 7.655249441548771, "grad_norm": 1.339288419719505, "kl": 0.264892578125, "learning_rate": 1.3212118476652644e-07, "loss": 0.014600299298763275, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5137, "train_speed(iter/s)": 0.02714 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.5, "completions/mean_length": 104.91666793823242, "completions/min_length": 47.0, "epoch": 7.656738644825019, "grad_norm": 1.12105726265172, "kl": 0.28125, "learning_rate": 1.3196106123411343e-07, "loss": 0.022000066936016083, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5138, "train_speed(iter/s)": 0.027136 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.0, "completions/mean_length": 83.05208587646484, "completions/min_length": 34.5, "epoch": 7.658227848101266, "grad_norm": 0.0031899120192501653, "kl": 0.31640625, "learning_rate": 1.3180102003883815e-07, "loss": 0.0003163740038871765, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5139, "train_speed(iter/s)": 0.027137 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 91.17708587646484, "completions/min_length": 45.25, "epoch": 7.659717051377513, "grad_norm": 0.0664732654996741, "kl": 0.31591796875, "learning_rate": 1.3164106121650447e-07, "loss": 0.0003164300578646362, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5140, "train_speed(iter/s)": 0.027135 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 100.09375381469727, "completions/min_length": 44.0, "epoch": 7.66120625465376, "grad_norm": 0.003102229665580787, "kl": 0.25927734375, "learning_rate": 1.3148118480289834e-07, "loss": 0.0002595627447590232, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5141, "train_speed(iter/s)": 0.027134 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 89.05208587646484, "completions/min_length": 33.75, "epoch": 7.662695457930007, "grad_norm": 0.0038120751967251534, "kl": 0.298095703125, "learning_rate": 1.3132139083378691e-07, "loss": 0.000297973194392398, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5142, "train_speed(iter/s)": 0.027131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.25, "completions/mean_length": 89.86458778381348, "completions/min_length": 42.25, "epoch": 7.664184661206255, "grad_norm": 0.016927178741449483, "kl": 0.31494140625, "learning_rate": 1.3116167934491906e-07, "loss": 0.00031464104540646076, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5143, "train_speed(iter/s)": 0.027132 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.75, "completions/mean_length": 98.39583587646484, "completions/min_length": 47.25, "epoch": 7.6656738644825015, "grad_norm": 0.006338797219993345, "kl": 0.29541015625, "learning_rate": 1.310020503720254e-07, "loss": 0.0002949381014332175, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5144, "train_speed(iter/s)": 0.027131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 90.12500381469727, "completions/min_length": 43.5, "epoch": 7.667163067758749, "grad_norm": 0.003171006398375192, "kl": 0.28662109375, "learning_rate": 1.3084250395081775e-07, "loss": 0.0002860654494725168, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5145, "train_speed(iter/s)": 0.027131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 93.09375190734863, "completions/min_length": 40.5, "epoch": 7.668652271034996, "grad_norm": 0.003548906539381047, "kl": 0.28271484375, "learning_rate": 1.3068304011698967e-07, "loss": 0.0002829779405146837, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5146, "train_speed(iter/s)": 0.027132 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/mean_length": 93.47916984558105, "completions/min_length": 42.25, "epoch": 7.670141474311244, "grad_norm": 0.0034227203247456143, "kl": 0.259765625, "learning_rate": 1.3052365890621613e-07, "loss": 0.0002596320991870016, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5147, "train_speed(iter/s)": 0.027131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.25, "completions/mean_length": 89.375, "completions/min_length": 40.0, "epoch": 7.6716306775874905, "grad_norm": 0.0032587116085214685, "kl": 0.2822265625, "learning_rate": 1.303643603541535e-07, "loss": 0.00028234158526174724, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5148, "train_speed(iter/s)": 0.027131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 84.47917175292969, "completions/min_length": 40.25, "epoch": 7.673119880863738, "grad_norm": 0.003695434217096597, "kl": 0.302734375, "learning_rate": 1.3020514449644015e-07, "loss": 0.0003021984884981066, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5149, "train_speed(iter/s)": 0.027132 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 83.83333587646484, "completions/min_length": 35.5, "epoch": 7.674609084139985, "grad_norm": 2.3126558891172144, "kl": 0.3203125, "learning_rate": 1.3004601136869553e-07, "loss": 0.005121786147356033, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5150, "train_speed(iter/s)": 0.027131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 98.34375190734863, "completions/min_length": 44.25, "epoch": 7.676098287416233, "grad_norm": 0.0032896115483056288, "kl": 0.27734375, "learning_rate": 1.2988696100652053e-07, "loss": 0.0002773258602246642, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5151, "train_speed(iter/s)": 0.027131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.0, "completions/mean_length": 98.48958587646484, "completions/min_length": 38.0, "epoch": 7.6775874906924795, "grad_norm": 0.003825342127582076, "kl": 0.285400390625, "learning_rate": 1.2972799344549778e-07, "loss": 0.00028501226915977895, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5152, "train_speed(iter/s)": 0.02713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 94.02083396911621, "completions/min_length": 38.75, "epoch": 7.679076693968726, "grad_norm": 0.003267537581952098, "kl": 0.28271484375, "learning_rate": 1.295691087211912e-07, "loss": 0.00028213317273184657, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5153, "train_speed(iter/s)": 0.02713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 88.70833396911621, "completions/min_length": 34.0, "epoch": 7.680565897244974, "grad_norm": 0.0035855429741991603, "kl": 0.2978515625, "learning_rate": 1.2941030686914616e-07, "loss": 0.00029722164617851377, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5154, "train_speed(iter/s)": 0.027128 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 91.65625, "completions/min_length": 43.75, "epoch": 7.682055100521221, "grad_norm": 0.0030675852411389446, "kl": 0.2861328125, "learning_rate": 1.292515879248899e-07, "loss": 0.0002862774417735636, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5155, "train_speed(iter/s)": 0.027128 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 99.26042175292969, "completions/min_length": 37.25, "epoch": 7.6835443037974684, "grad_norm": 0.003339807465395746, "kl": 0.2724609375, "learning_rate": 1.2909295192393055e-07, "loss": 0.00027266101096756756, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5156, "train_speed(iter/s)": 0.027128 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 97.86458778381348, "completions/min_length": 48.25, "epoch": 7.685033507073715, "grad_norm": 1.4945914252525287, "kl": 0.253662109375, "learning_rate": 1.2893439890175794e-07, "loss": 0.013653441332280636, "memory(GiB)": 112.53, "reward": 1.854166716337204, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.28614169359207153, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5157, "train_speed(iter/s)": 0.02713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 84.75000381469727, "completions/min_length": 35.75, "epoch": 7.686522710349963, "grad_norm": 0.00299256920932098, "kl": 0.289794921875, "learning_rate": 1.2877592889384342e-07, "loss": 0.0002897943777497858, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5158, "train_speed(iter/s)": 0.027129 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 90.18750381469727, "completions/min_length": 38.75, "epoch": 7.68801191362621, "grad_norm": 0.0035018001294600427, "kl": 0.2998046875, "learning_rate": 1.2861754193563945e-07, "loss": 0.00029998281388543546, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5159, "train_speed(iter/s)": 0.027131 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 99.40625190734863, "completions/min_length": 40.5, "epoch": 7.689501116902457, "grad_norm": 0.002610880297953872, "kl": 0.2666015625, "learning_rate": 1.284592380625804e-07, "loss": 0.0002664898638613522, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5160, "train_speed(iter/s)": 0.02713 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 90.37500190734863, "completions/min_length": 42.25, "epoch": 7.690990320178704, "grad_norm": 0.003248657957093907, "kl": 0.2978515625, "learning_rate": 1.283010173100817e-07, "loss": 0.0002977395779453218, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5161, "train_speed(iter/s)": 0.027129 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.5, "completions/mean_length": 95.72916793823242, "completions/min_length": 46.25, "epoch": 7.692479523454952, "grad_norm": 0.003070812507122307, "kl": 0.27294921875, "learning_rate": 1.281428797135402e-07, "loss": 0.00027329204021953046, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5162, "train_speed(iter/s)": 0.027129 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.0, "completions/mean_length": 92.46875190734863, "completions/min_length": 35.25, "epoch": 7.693968726731199, "grad_norm": 0.0031917236791162125, "kl": 0.28759765625, "learning_rate": 1.2798482530833433e-07, "loss": 0.0002876642975024879, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5163, "train_speed(iter/s)": 0.027128 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 105.84375190734863, "completions/min_length": 41.75, "epoch": 7.695457930007446, "grad_norm": 1.7133725530531339, "kl": 0.257080078125, "learning_rate": 1.2782685412982347e-07, "loss": 0.02863353304564953, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5164, "train_speed(iter/s)": 0.027127 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 83.77083778381348, "completions/min_length": 37.5, "epoch": 7.696947133283693, "grad_norm": 0.029706884612301795, "kl": 0.3193359375, "learning_rate": 1.2766896621334927e-07, "loss": 0.0003196493489667773, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5165, "train_speed(iter/s)": 0.027124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 79.42708396911621, "completions/min_length": 40.75, "epoch": 7.69843633655994, "grad_norm": 0.0032236595520761606, "kl": 0.292236328125, "learning_rate": 1.275111615942336e-07, "loss": 0.00029213755624368787, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5166, "train_speed(iter/s)": 0.027125 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.25, "completions/mean_length": 83.84375190734863, "completions/min_length": 37.75, "epoch": 7.699925539836188, "grad_norm": 0.0032032385757980513, "kl": 0.2958984375, "learning_rate": 1.2735344030778067e-07, "loss": 0.0002960015262942761, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5167, "train_speed(iter/s)": 0.027125 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.75, "completions/mean_length": 82.54166793823242, "completions/min_length": 32.75, "epoch": 7.701414743112435, "grad_norm": 0.004089289621646082, "kl": 0.2958984375, "learning_rate": 1.2719580238927553e-07, "loss": 0.00029570417245849967, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5168, "train_speed(iter/s)": 0.027125 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/mean_length": 92.69791984558105, "completions/min_length": 37.0, "epoch": 7.702903946388682, "grad_norm": 0.0029997888154326712, "kl": 0.28857421875, "learning_rate": 1.2703824787398453e-07, "loss": 0.00028805440524592996, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5169, "train_speed(iter/s)": 0.027125 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 96.56250190734863, "completions/min_length": 38.75, "epoch": 7.704393149664929, "grad_norm": 0.0027123551039127977, "kl": 0.26318359375, "learning_rate": 1.268807767971561e-07, "loss": 0.0002626906498335302, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5170, "train_speed(iter/s)": 0.027126 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 95.32291793823242, "completions/min_length": 41.75, "epoch": 7.705882352941177, "grad_norm": 0.003489217473952397, "kl": 0.287841796875, "learning_rate": 1.2672338919401864e-07, "loss": 0.00028742203721776605, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5171, "train_speed(iter/s)": 0.027126 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 93.40625381469727, "completions/min_length": 46.5, "epoch": 7.707371556217423, "grad_norm": 0.0031221273226283716, "kl": 0.29345703125, "learning_rate": 1.2656608509978333e-07, "loss": 0.00029349327087402344, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5172, "train_speed(iter/s)": 0.027124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 89.29166984558105, "completions/min_length": 37.75, "epoch": 7.708860759493671, "grad_norm": 0.0037527442478018577, "kl": 0.3095703125, "learning_rate": 1.2640886454964173e-07, "loss": 0.0003093392588198185, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5173, "train_speed(iter/s)": 0.027124 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.75, "completions/mean_length": 89.45833587646484, "completions/min_length": 45.75, "epoch": 7.710349962769918, "grad_norm": 0.003635312729525058, "kl": 0.29296875, "learning_rate": 1.262517275787669e-07, "loss": 0.000292744894977659, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5174, "train_speed(iter/s)": 0.027123 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 86.12500381469727, "completions/min_length": 39.25, "epoch": 7.711839166046166, "grad_norm": 1.6359493302882007, "kl": 0.2880859375, "learning_rate": 1.2609467422231372e-07, "loss": -0.006814556196331978, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5175, "train_speed(iter/s)": 0.027123 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 87.78125381469727, "completions/min_length": 33.0, "epoch": 7.713328369322412, "grad_norm": 0.0033987909386965693, "kl": 0.28369140625, "learning_rate": 1.2593770451541736e-07, "loss": 0.0002834838523995131, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5176, "train_speed(iter/s)": 0.027122 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 92.39583587646484, "completions/min_length": 34.5, "epoch": 7.71481757259866, "grad_norm": 0.05883370515212629, "kl": 0.28857421875, "learning_rate": 1.2578081849319545e-07, "loss": 0.0002883112174458802, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5177, "train_speed(iter/s)": 0.02712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 90.31250190734863, "completions/min_length": 37.5, "epoch": 7.716306775874907, "grad_norm": 1.589341443411682, "kl": 0.27685546875, "learning_rate": 1.256240161907457e-07, "loss": 0.006803794763982296, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5178, "train_speed(iter/s)": 0.027121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 83.91666793823242, "completions/min_length": 40.0, "epoch": 7.717795979151154, "grad_norm": 0.0031393544850257184, "kl": 0.3037109375, "learning_rate": 1.254672976431481e-07, "loss": 0.0003042399766854942, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5179, "train_speed(iter/s)": 0.027121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.25, "completions/mean_length": 83.06250190734863, "completions/min_length": 43.0, "epoch": 7.719285182427401, "grad_norm": 0.003062941652957588, "kl": 0.30712890625, "learning_rate": 1.253106628854635e-07, "loss": 0.00030738930217921734, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5180, "train_speed(iter/s)": 0.027121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 95.08333587646484, "completions/min_length": 39.5, "epoch": 7.720774385703649, "grad_norm": 0.0030320034757306295, "kl": 0.271484375, "learning_rate": 1.2515411195273362e-07, "loss": 0.0002714476431719959, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5181, "train_speed(iter/s)": 0.027123 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 93.08333587646484, "completions/min_length": 32.5, "epoch": 7.722263588979896, "grad_norm": 0.0026139622775306014, "kl": 0.291015625, "learning_rate": 1.2499764487998242e-07, "loss": 0.0002911720657721162, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5182, "train_speed(iter/s)": 0.027121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 164.5, "completions/mean_length": 81.33333587646484, "completions/min_length": 38.5, "epoch": 7.723752792256143, "grad_norm": 0.0031424548837180365, "kl": 0.31103515625, "learning_rate": 1.2484126170221386e-07, "loss": 0.00031066822702996433, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5183, "train_speed(iter/s)": 0.027122 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 85.29166984558105, "completions/min_length": 39.25, "epoch": 7.72524199553239, "grad_norm": 2.7118215858382535, "kl": 0.29931640625, "learning_rate": 1.2468496245441423e-07, "loss": -0.00917364377528429, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5184, "train_speed(iter/s)": 0.027122 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 82.71875381469727, "completions/min_length": 40.5, "epoch": 7.726731198808637, "grad_norm": 0.0031537067425255827, "kl": 0.28955078125, "learning_rate": 1.2452874717155032e-07, "loss": 0.00028972106520086527, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5185, "train_speed(iter/s)": 0.027123 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 97.09375190734863, "completions/min_length": 33.75, "epoch": 7.728220402084885, "grad_norm": 0.003282866649935314, "kl": 0.2822265625, "learning_rate": 1.2437261588857034e-07, "loss": 0.0002824193798005581, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5186, "train_speed(iter/s)": 0.027122 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 91.86458587646484, "completions/min_length": 39.0, "epoch": 7.729709605361132, "grad_norm": 0.0029945686366974, "kl": 0.291748046875, "learning_rate": 1.2421656864040403e-07, "loss": 0.00029153760988265276, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5187, "train_speed(iter/s)": 0.02712 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.5, "completions/mean_length": 98.70833396911621, "completions/min_length": 42.5, "epoch": 7.731198808637379, "grad_norm": 0.002832640166473255, "kl": 0.26806640625, "learning_rate": 1.2406060546196185e-07, "loss": 0.00026794426958076656, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5188, "train_speed(iter/s)": 0.027121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.25, "completions/mean_length": 100.43750381469727, "completions/min_length": 40.25, "epoch": 7.732688011913626, "grad_norm": 0.00301887234948868, "kl": 0.24609375, "learning_rate": 1.239047263881357e-07, "loss": 0.0002463290293235332, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5189, "train_speed(iter/s)": 0.027121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 90.69791984558105, "completions/min_length": 44.5, "epoch": 7.734177215189874, "grad_norm": 0.0034283601756038297, "kl": 0.2880859375, "learning_rate": 1.237489314537986e-07, "loss": 0.00028826098423451185, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5190, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 93.88541984558105, "completions/min_length": 39.0, "epoch": 7.735666418466121, "grad_norm": 0.0027219987987809583, "kl": 0.27490234375, "learning_rate": 1.2359322069380478e-07, "loss": 0.000274638005066663, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5191, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 77.87500381469727, "completions/min_length": 33.0, "epoch": 7.737155621742367, "grad_norm": 1.331317652127087, "kl": 0.31787109375, "learning_rate": 1.2343759414298954e-07, "loss": -0.0024042846634984016, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5192, "train_speed(iter/s)": 0.027121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.0, "completions/mean_length": 95.97916793823242, "completions/min_length": 39.75, "epoch": 7.738644825018615, "grad_norm": 0.0031677530348288114, "kl": 0.30859375, "learning_rate": 1.2328205183616962e-07, "loss": 0.00030893069924786687, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5193, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 93.81250190734863, "completions/min_length": 43.0, "epoch": 7.740134028294863, "grad_norm": 0.003153291315351702, "kl": 0.27392578125, "learning_rate": 1.2312659380814261e-07, "loss": 0.00027379393577575684, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5194, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.25, "completions/mean_length": 103.23958587646484, "completions/min_length": 39.5, "epoch": 7.7416232315711095, "grad_norm": 0.0027762603294544776, "kl": 0.259521484375, "learning_rate": 1.2297122009368738e-07, "loss": 0.0002593224053271115, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5195, "train_speed(iter/s)": 0.027116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 89.58333587646484, "completions/min_length": 34.5, "epoch": 7.743112434847356, "grad_norm": 2.9549129460735517, "kl": 0.2919921875, "learning_rate": 1.2281593072756386e-07, "loss": -0.03122340515255928, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5196, "train_speed(iter/s)": 0.027116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 359.5, "completions/mean_length": 104.33333587646484, "completions/min_length": 47.5, "epoch": 7.744601638123604, "grad_norm": 0.009385010207772564, "kl": 0.26123046875, "learning_rate": 1.2266072574451303e-07, "loss": 0.00026103196432814, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5197, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 87.09375190734863, "completions/min_length": 36.75, "epoch": 7.746090841399851, "grad_norm": 0.0030830878013166554, "kl": 0.272705078125, "learning_rate": 1.2250560517925745e-07, "loss": 0.00027296182815916836, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5198, "train_speed(iter/s)": 0.027115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 103.58333587646484, "completions/min_length": 44.5, "epoch": 7.7475800446760985, "grad_norm": 1.614944344638039, "kl": 0.237060546875, "learning_rate": 1.2235056906650032e-07, "loss": -0.005771105643361807, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5199, "train_speed(iter/s)": 0.027116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.25, "completions/mean_length": 89.51041793823242, "completions/min_length": 42.0, "epoch": 7.749069247952345, "grad_norm": 0.0033261604438498105, "kl": 0.30126953125, "learning_rate": 1.2219561744092605e-07, "loss": 0.0003011351509485394, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5200, "train_speed(iter/s)": 0.027117 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 89.18750381469727, "completions/min_length": 31.25, "epoch": 7.750558451228593, "grad_norm": 0.0030993706653648146, "kl": 0.29736328125, "learning_rate": 1.2204075033720024e-07, "loss": 0.000296980346320197, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5201, "train_speed(iter/s)": 0.027116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.0, "completions/mean_length": 93.30208778381348, "completions/min_length": 39.0, "epoch": 7.75204765450484, "grad_norm": 0.0030462190654084523, "kl": 0.28564453125, "learning_rate": 1.2188596778996934e-07, "loss": 0.00028549617854878306, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5202, "train_speed(iter/s)": 0.027117 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.75, "completions/mean_length": 91.38541984558105, "completions/min_length": 38.5, "epoch": 7.7535368577810875, "grad_norm": 0.0027559313870410117, "kl": 0.27294921875, "learning_rate": 1.2173126983386163e-07, "loss": 0.0002728736726567149, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5203, "train_speed(iter/s)": 0.027118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 94.55208587646484, "completions/min_length": 38.5, "epoch": 7.755026061057334, "grad_norm": 0.0025802657161634037, "kl": 0.26904296875, "learning_rate": 1.2157665650348515e-07, "loss": 0.00026923997211270034, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5204, "train_speed(iter/s)": 0.027118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 90.50000381469727, "completions/min_length": 28.0, "epoch": 7.756515264333581, "grad_norm": 0.003095974704901838, "kl": 0.28759765625, "learning_rate": 1.214221278334303e-07, "loss": 0.00028729019686579704, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5205, "train_speed(iter/s)": 0.027116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 93.37500190734863, "completions/min_length": 40.75, "epoch": 7.758004467609829, "grad_norm": 2.250748286925439, "kl": 0.288818359375, "learning_rate": 1.212676838582679e-07, "loss": -0.006075484678149223, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5206, "train_speed(iter/s)": 0.027118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.0, "completions/mean_length": 103.21875381469727, "completions/min_length": 36.75, "epoch": 7.759493670886076, "grad_norm": 1.1151587637984286, "kl": 0.26513671875, "learning_rate": 1.2111332461254968e-07, "loss": 0.005438760854303837, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5207, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 89.57291984558105, "completions/min_length": 44.0, "epoch": 7.760982874162323, "grad_norm": 0.0030161212966110604, "kl": 0.291748046875, "learning_rate": 1.2095905013080927e-07, "loss": 0.0002920784172601998, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5208, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 89.06250190734863, "completions/min_length": 37.0, "epoch": 7.76247207743857, "grad_norm": 0.04017588600638382, "kl": 0.29638671875, "learning_rate": 1.2080486044756e-07, "loss": 0.00029637571424245834, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5209, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 94.96875190734863, "completions/min_length": 41.5, "epoch": 7.763961280714818, "grad_norm": 1.207481010437636, "kl": 0.283203125, "learning_rate": 1.2065075559729748e-07, "loss": 0.0015179485781118274, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5210, "train_speed(iter/s)": 0.027121 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.25, "completions/mean_length": 107.93750381469727, "completions/min_length": 41.5, "epoch": 7.7654504839910645, "grad_norm": 0.002693169208895029, "kl": 0.263427734375, "learning_rate": 1.2049673561449764e-07, "loss": 0.00026377200265415013, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5211, "train_speed(iter/s)": 0.027118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 91.28125190734863, "completions/min_length": 43.5, "epoch": 7.766939687267312, "grad_norm": 1.647591154000061, "kl": 0.29345703125, "learning_rate": 1.2034280053361744e-07, "loss": -0.02654242143034935, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5212, "train_speed(iter/s)": 0.027117 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 91.35416793823242, "completions/min_length": 43.75, "epoch": 7.768428890543559, "grad_norm": 0.0027734196496829924, "kl": 0.260498046875, "learning_rate": 1.2018895038909549e-07, "loss": 0.00026018329663202167, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5213, "train_speed(iter/s)": 0.027117 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 94.23958778381348, "completions/min_length": 42.25, "epoch": 7.769918093819807, "grad_norm": 0.0029982308139872606, "kl": 0.27392578125, "learning_rate": 1.2003518521535033e-07, "loss": 0.0002744350931607187, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5214, "train_speed(iter/s)": 0.027116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.25, "completions/mean_length": 90.125, "completions/min_length": 45.25, "epoch": 7.7714072970960535, "grad_norm": 0.0028971060862540746, "kl": 0.2890625, "learning_rate": 1.1988150504678268e-07, "loss": 0.0002887507725972682, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5215, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 91.03125381469727, "completions/min_length": 31.25, "epoch": 7.772896500372301, "grad_norm": 0.0024869005571701197, "kl": 0.28955078125, "learning_rate": 1.197279099177731e-07, "loss": 0.0002896463847719133, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5216, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 90.66666793823242, "completions/min_length": 39.25, "epoch": 7.774385703648548, "grad_norm": 0.003316420337803665, "kl": 0.28369140625, "learning_rate": 1.1957439986268397e-07, "loss": 0.0002836002968251705, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5217, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 156.5, "completions/mean_length": 85.98958587646484, "completions/min_length": 43.5, "epoch": 7.775874906924795, "grad_norm": 0.0032259650702157225, "kl": 0.2958984375, "learning_rate": 1.194209749158583e-07, "loss": 0.00029558094684034586, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5218, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 95.81250190734863, "completions/min_length": 38.25, "epoch": 7.7773641102010425, "grad_norm": 0.002744816085680799, "kl": 0.2861328125, "learning_rate": 1.1926763511161992e-07, "loss": 0.00028632121393457055, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5219, "train_speed(iter/s)": 0.027111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 90.60416793823242, "completions/min_length": 37.0, "epoch": 7.77885331347729, "grad_norm": 3.524603980311441, "kl": 0.31103515625, "learning_rate": 1.1911438048427425e-07, "loss": 0.0022823603358119726, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5220, "train_speed(iter/s)": 0.027109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 95.07291984558105, "completions/min_length": 38.0, "epoch": 7.780342516753537, "grad_norm": 0.005444875244522523, "kl": 0.31103515625, "learning_rate": 1.1896121106810658e-07, "loss": 0.00031070332624949515, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5221, "train_speed(iter/s)": 0.027107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 93.43750190734863, "completions/min_length": 41.75, "epoch": 7.781831720029784, "grad_norm": 0.0028320884243560893, "kl": 0.27880859375, "learning_rate": 1.1880812689738418e-07, "loss": 0.00027850893093273044, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5222, "train_speed(iter/s)": 0.027107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 90.03125381469727, "completions/min_length": 37.25, "epoch": 7.783320923306031, "grad_norm": 0.0028124504555759795, "kl": 0.271240234375, "learning_rate": 1.1865512800635469e-07, "loss": 0.0002707619278226048, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5223, "train_speed(iter/s)": 0.027109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 89.32291984558105, "completions/min_length": 40.25, "epoch": 7.784810126582278, "grad_norm": 0.003069712779750675, "kl": 0.2783203125, "learning_rate": 1.1850221442924668e-07, "loss": 0.00027824885910376906, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5224, "train_speed(iter/s)": 0.027109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 93.22916984558105, "completions/min_length": 44.75, "epoch": 7.786299329858526, "grad_norm": 0.003238832559942142, "kl": 0.26318359375, "learning_rate": 1.1834938620027019e-07, "loss": 0.0002632614632602781, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5225, "train_speed(iter/s)": 0.027108 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 100.41667175292969, "completions/min_length": 46.25, "epoch": 7.787788533134773, "grad_norm": 0.0028336526314197944, "kl": 0.27978515625, "learning_rate": 1.1819664335361506e-07, "loss": 0.0002794988686218858, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5226, "train_speed(iter/s)": 0.027107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 104.75000190734863, "completions/min_length": 37.0, "epoch": 7.78927773641102, "grad_norm": 0.9119582540098427, "kl": 0.25634765625, "learning_rate": 1.1804398592345322e-07, "loss": -0.011160612106323242, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5227, "train_speed(iter/s)": 0.027109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 84.16666984558105, "completions/min_length": 44.75, "epoch": 7.790766939687267, "grad_norm": 0.0029224971482972187, "kl": 0.310546875, "learning_rate": 1.1789141394393681e-07, "loss": 0.00031062259222380817, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5228, "train_speed(iter/s)": 0.027107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 86.97916984558105, "completions/min_length": 40.25, "epoch": 7.792256142963515, "grad_norm": 0.0030398196316112625, "kl": 0.29345703125, "learning_rate": 1.17738927449199e-07, "loss": 0.0002932066563516855, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5229, "train_speed(iter/s)": 0.027109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 90.71875190734863, "completions/min_length": 43.25, "epoch": 7.793745346239762, "grad_norm": 0.003939000128518543, "kl": 0.29931640625, "learning_rate": 1.1758652647335382e-07, "loss": 0.0002993523667100817, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5230, "train_speed(iter/s)": 0.027109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 89.89583396911621, "completions/min_length": 41.0, "epoch": 7.7952345495160085, "grad_norm": 0.003093529962505871, "kl": 0.30126953125, "learning_rate": 1.1743421105049611e-07, "loss": 0.00030122819589450955, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5231, "train_speed(iter/s)": 0.027111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 83.04166984558105, "completions/min_length": 37.0, "epoch": 7.796723752792256, "grad_norm": 0.003297330448925178, "kl": 0.2841796875, "learning_rate": 1.1728198121470196e-07, "loss": 0.0002841795503627509, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5232, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 100.94792175292969, "completions/min_length": 36.5, "epoch": 7.798212956068504, "grad_norm": 0.0028992842913446175, "kl": 0.256103515625, "learning_rate": 1.1712983700002776e-07, "loss": 0.00025553908199071884, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5233, "train_speed(iter/s)": 0.02711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.0, "completions/mean_length": 95.06250381469727, "completions/min_length": 41.25, "epoch": 7.799702159344751, "grad_norm": 1.5931475330477922, "kl": 0.29248046875, "learning_rate": 1.1697777844051104e-07, "loss": -0.014437883161008358, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5234, "train_speed(iter/s)": 0.027107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 100.71875381469727, "completions/min_length": 39.75, "epoch": 7.801191362620997, "grad_norm": 1.212715726665645, "kl": 0.283203125, "learning_rate": 1.1682580557017013e-07, "loss": 0.0030116522684693336, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5235, "train_speed(iter/s)": 0.027109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 91.37500190734863, "completions/min_length": 40.5, "epoch": 7.802680565897245, "grad_norm": 0.003131826719379659, "kl": 0.269775390625, "learning_rate": 1.1667391842300406e-07, "loss": 0.00026978953974321485, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5236, "train_speed(iter/s)": 0.027109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 90.62500381469727, "completions/min_length": 43.25, "epoch": 7.804169769173492, "grad_norm": 1.925021556249628, "kl": 0.28662109375, "learning_rate": 1.1652211703299309e-07, "loss": -0.007337482180446386, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5237, "train_speed(iter/s)": 0.027111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/mean_length": 94.88541793823242, "completions/min_length": 42.75, "epoch": 7.80565897244974, "grad_norm": 0.003428211258026816, "kl": 0.265869140625, "learning_rate": 1.1637040143409782e-07, "loss": 0.00026601547142490745, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5238, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 100.51041984558105, "completions/min_length": 44.25, "epoch": 7.807148175725986, "grad_norm": 0.0027943113757916463, "kl": 0.26220703125, "learning_rate": 1.162187716602599e-07, "loss": 0.0002621148887556046, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5239, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 90.21875190734863, "completions/min_length": 37.0, "epoch": 7.808637379002234, "grad_norm": 0.0032538451838003235, "kl": 0.3125, "learning_rate": 1.1606722774540145e-07, "loss": 0.0003128517128061503, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5240, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 94.62500381469727, "completions/min_length": 40.25, "epoch": 7.810126582278481, "grad_norm": 0.0071674523442492736, "kl": 0.2861328125, "learning_rate": 1.1591576972342626e-07, "loss": 0.00028609446599148214, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5241, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 96.70833396911621, "completions/min_length": 40.5, "epoch": 7.811615785554729, "grad_norm": 0.0033041942104805894, "kl": 0.29833984375, "learning_rate": 1.1576439762821766e-07, "loss": 0.000298295053653419, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5242, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.25, "completions/mean_length": 90.77083587646484, "completions/min_length": 41.25, "epoch": 7.813104988830975, "grad_norm": 0.0031062305545551845, "kl": 0.280029296875, "learning_rate": 1.1561311149364073e-07, "loss": 0.00027949566720053554, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5243, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.25, "completions/mean_length": 102.79166984558105, "completions/min_length": 35.0, "epoch": 7.814594192107222, "grad_norm": 0.0025907258037157744, "kl": 0.267578125, "learning_rate": 1.1546191135354099e-07, "loss": 0.0002676534350030124, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5244, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 96.14583587646484, "completions/min_length": 34.5, "epoch": 7.81608339538347, "grad_norm": 0.0037813535078087485, "kl": 0.2705078125, "learning_rate": 1.1531079724174447e-07, "loss": 0.00027048058109357953, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5245, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 91.20833396911621, "completions/min_length": 42.25, "epoch": 7.8175725986597175, "grad_norm": 1.0648481016242652, "kl": 0.273193359375, "learning_rate": 1.1515976919205866e-07, "loss": 0.010213752277195454, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5246, "train_speed(iter/s)": 0.02711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 87.20833587646484, "completions/min_length": 39.25, "epoch": 7.819061801935964, "grad_norm": 0.0029342673721213203, "kl": 0.29248046875, "learning_rate": 1.1500882723827082e-07, "loss": 0.0002924884611275047, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5247, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 98.81250381469727, "completions/min_length": 36.75, "epoch": 7.820551005212211, "grad_norm": 0.002911017152498315, "kl": 0.2822265625, "learning_rate": 1.1485797141414977e-07, "loss": 0.0002822238311637193, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5248, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 102.47916793823242, "completions/min_length": 43.75, "epoch": 7.822040208488459, "grad_norm": 0.002798040032313957, "kl": 0.25146484375, "learning_rate": 1.1470720175344472e-07, "loss": 0.0002512465580366552, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5249, "train_speed(iter/s)": 0.027111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 89.16667175292969, "completions/min_length": 43.0, "epoch": 7.823529411764706, "grad_norm": 0.003309462311228072, "kl": 0.26953125, "learning_rate": 1.145565182898855e-07, "loss": 0.0002698110183700919, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5250, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 327.75, "completions/mean_length": 103.54167175292969, "completions/min_length": 27.5, "epoch": 7.825018615040953, "grad_norm": 0.0025888618424710196, "kl": 0.2470703125, "learning_rate": 1.144059210571831e-07, "loss": 0.00024691392900422215, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5251, "train_speed(iter/s)": 0.027111 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.75, "completions/mean_length": 95.84375190734863, "completions/min_length": 46.5, "epoch": 7.8265078183172, "grad_norm": 0.0029629613024630833, "kl": 0.267333984375, "learning_rate": 1.142554100890285e-07, "loss": 0.0002673405397217721, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5252, "train_speed(iter/s)": 0.02711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.0, "completions/mean_length": 80.68750190734863, "completions/min_length": 37.0, "epoch": 7.827997021593448, "grad_norm": 1.243666348429367, "kl": 0.31689453125, "learning_rate": 1.1410498541909425e-07, "loss": 0.006775502115488052, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5253, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 106.44791793823242, "completions/min_length": 45.25, "epoch": 7.829486224869695, "grad_norm": 0.0029889001841307645, "kl": 0.238525390625, "learning_rate": 1.1395464708103264e-07, "loss": 0.00023864749528001994, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5254, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.75, "completions/mean_length": 107.30208778381348, "completions/min_length": 34.75, "epoch": 7.830975428145942, "grad_norm": 0.004141067463537521, "kl": 0.2470703125, "learning_rate": 1.1380439510847756e-07, "loss": 0.00024703811504878104, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5255, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 102.82291793823242, "completions/min_length": 34.25, "epoch": 7.832464631422189, "grad_norm": 0.002696836497604405, "kl": 0.257080078125, "learning_rate": 1.13654229535043e-07, "loss": 0.00025710766203701496, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5256, "train_speed(iter/s)": 0.027115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.75, "completions/mean_length": 84.57291793823242, "completions/min_length": 43.0, "epoch": 7.833953834698436, "grad_norm": 0.00439374175348037, "kl": 0.314453125, "learning_rate": 1.1350415039432365e-07, "loss": 0.00031448545632883906, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5257, "train_speed(iter/s)": 0.027117 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 106.05208396911621, "completions/min_length": 40.0, "epoch": 7.8354430379746836, "grad_norm": 0.002670672598391674, "kl": 0.261962890625, "learning_rate": 1.1335415771989538e-07, "loss": 0.0002616860147099942, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5258, "train_speed(iter/s)": 0.027115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 88.77083587646484, "completions/min_length": 35.25, "epoch": 7.836932241250931, "grad_norm": 0.01356584160282438, "kl": 0.31103515625, "learning_rate": 1.1320425154531388e-07, "loss": 0.0003102751215919852, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5259, "train_speed(iter/s)": 0.027116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.75, "completions/mean_length": 98.76041984558105, "completions/min_length": 43.25, "epoch": 7.838421444527178, "grad_norm": 0.0032681418061520566, "kl": 0.267333984375, "learning_rate": 1.1305443190411623e-07, "loss": 0.0002670586109161377, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5260, "train_speed(iter/s)": 0.027115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.5, "completions/mean_length": 88.16666984558105, "completions/min_length": 34.0, "epoch": 7.839910647803425, "grad_norm": 0.002904474441878718, "kl": 0.3056640625, "learning_rate": 1.1290469882981984e-07, "loss": 0.00030567828798666596, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5261, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.75, "completions/mean_length": 100.85416793823242, "completions/min_length": 40.75, "epoch": 7.8413998510796725, "grad_norm": 0.002985115092378623, "kl": 0.265380859375, "learning_rate": 1.1275505235592253e-07, "loss": 0.0002654914278537035, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5262, "train_speed(iter/s)": 0.027115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 98.67708587646484, "completions/min_length": 40.75, "epoch": 7.842889054355919, "grad_norm": 0.003581396052770761, "kl": 0.262939453125, "learning_rate": 1.1260549251590351e-07, "loss": 0.00026295968564227223, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5263, "train_speed(iter/s)": 0.027115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 84.00000381469727, "completions/min_length": 42.25, "epoch": 7.844378257632167, "grad_norm": 0.002749781948275779, "kl": 0.3193359375, "learning_rate": 1.1245601934322147e-07, "loss": 0.0003196038887836039, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5264, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 89.48958396911621, "completions/min_length": 40.75, "epoch": 7.845867460908414, "grad_norm": 0.002936409967698357, "kl": 0.267333984375, "learning_rate": 1.123066328713168e-07, "loss": 0.00026711702230386436, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5265, "train_speed(iter/s)": 0.027116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 321.75, "completions/mean_length": 111.625, "completions/min_length": 48.25, "epoch": 7.8473566641846615, "grad_norm": 0.002757362563616147, "kl": 0.253173828125, "learning_rate": 1.121573331336098e-07, "loss": 0.0002536388929001987, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5266, "train_speed(iter/s)": 0.027117 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 90.27083396911621, "completions/min_length": 37.0, "epoch": 7.848845867460908, "grad_norm": 1.8834064186202393, "kl": 0.28369140625, "learning_rate": 1.120081201635017e-07, "loss": 0.001977204345166683, "memory(GiB)": 112.53, "reward": 1.875, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.875, "rewards/CineAccuracyORM/std": 0.1276884824037552, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5267, "train_speed(iter/s)": 0.027116 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 90.02083396911621, "completions/min_length": 36.25, "epoch": 7.850335070737156, "grad_norm": 0.0028704996591520362, "kl": 0.2724609375, "learning_rate": 1.118589939943741e-07, "loss": 0.00027266511460766196, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5268, "train_speed(iter/s)": 0.027118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 107.65625190734863, "completions/min_length": 37.0, "epoch": 7.851824274013403, "grad_norm": 0.0026261845152162726, "kl": 0.24560546875, "learning_rate": 1.117099546595892e-07, "loss": 0.0002449930179864168, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5269, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 88.03125381469727, "completions/min_length": 37.5, "epoch": 7.85331347728965, "grad_norm": 0.002728150644723427, "kl": 0.29833984375, "learning_rate": 1.115610021924902e-07, "loss": 0.00029783177888020873, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5270, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 102.17708587646484, "completions/min_length": 39.75, "epoch": 7.854802680565897, "grad_norm": 0.002656274588716418, "kl": 0.2587890625, "learning_rate": 1.1141213662640032e-07, "loss": 0.00025908020325005054, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5271, "train_speed(iter/s)": 0.027118 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 98.90625381469727, "completions/min_length": 38.0, "epoch": 7.856291883842145, "grad_norm": 1.3461757779651085, "kl": 0.287353515625, "learning_rate": 1.1126335799462355e-07, "loss": 0.006700645200908184, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5272, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 100.26041793823242, "completions/min_length": 39.5, "epoch": 7.857781087118392, "grad_norm": 0.003156573728553424, "kl": 0.26904296875, "learning_rate": 1.1111466633044447e-07, "loss": 0.0002686984953470528, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5273, "train_speed(iter/s)": 0.027119 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.75, "completions/mean_length": 108.07291984558105, "completions/min_length": 47.0, "epoch": 7.8592702903946385, "grad_norm": 0.4054476954605806, "kl": 0.373046875, "learning_rate": 1.1096606166712797e-07, "loss": 0.0003733936173375696, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5274, "train_speed(iter/s)": 0.027117 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.75, "completions/mean_length": 105.71875190734863, "completions/min_length": 43.0, "epoch": 7.860759493670886, "grad_norm": 0.0028451168680105558, "kl": 0.258056640625, "learning_rate": 1.1081754403791999e-07, "loss": 0.00025822294992394745, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5275, "train_speed(iter/s)": 0.027117 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 94.18750190734863, "completions/min_length": 39.0, "epoch": 7.862248696947133, "grad_norm": 0.002965903498993699, "kl": 0.296875, "learning_rate": 1.1066911347604652e-07, "loss": 0.0002969347406178713, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5276, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.75, "completions/mean_length": 91.23958587646484, "completions/min_length": 47.5, "epoch": 7.863737900223381, "grad_norm": 0.0032591687403089693, "kl": 0.28662109375, "learning_rate": 1.1052077001471422e-07, "loss": 0.0002865197602659464, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5277, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 93.64583587646484, "completions/min_length": 37.75, "epoch": 7.8652271034996275, "grad_norm": 0.9489916751847327, "kl": 0.265625, "learning_rate": 1.1037251368711032e-07, "loss": 0.0035377750173211098, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5278, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 87.45833587646484, "completions/min_length": 43.75, "epoch": 7.866716306775875, "grad_norm": 0.0028179049814574552, "kl": 0.30126953125, "learning_rate": 1.1022434452640251e-07, "loss": 0.00030029387562535703, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5279, "train_speed(iter/s)": 0.027115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 99.43750190734863, "completions/min_length": 48.0, "epoch": 7.868205510052122, "grad_norm": 0.9983864227123215, "kl": 0.254150390625, "learning_rate": 1.1007626256573877e-07, "loss": -0.005463731940835714, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5280, "train_speed(iter/s)": 0.027115 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/mean_length": 107.82292175292969, "completions/min_length": 48.25, "epoch": 7.86969471332837, "grad_norm": 0.0026929790849268883, "kl": 0.251708984375, "learning_rate": 1.099282678382481e-07, "loss": 0.0002520806447137147, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5281, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.25, "completions/mean_length": 92.09375381469727, "completions/min_length": 42.5, "epoch": 7.8711839166046165, "grad_norm": 0.0027405986011307957, "kl": 0.27587890625, "learning_rate": 1.0978036037703953e-07, "loss": 0.00027632349519990385, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5282, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 97.38541793823242, "completions/min_length": 45.5, "epoch": 7.872673119880863, "grad_norm": 1.0422720779986807, "kl": 0.291015625, "learning_rate": 1.0963254021520268e-07, "loss": -0.008327137678861618, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5283, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 86.48958778381348, "completions/min_length": 37.25, "epoch": 7.874162323157111, "grad_norm": 0.002921172048491147, "kl": 0.31201171875, "learning_rate": 1.0948480738580774e-07, "loss": 0.00031176305492408574, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5284, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 95.43750381469727, "completions/min_length": 37.75, "epoch": 7.875651526433359, "grad_norm": 1.0578843111181226, "kl": 0.27783203125, "learning_rate": 1.09337161921905e-07, "loss": -0.004358252510428429, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5285, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.25, "completions/mean_length": 106.42708778381348, "completions/min_length": 46.25, "epoch": 7.877140729709605, "grad_norm": 0.002585443653620602, "kl": 0.24951171875, "learning_rate": 1.0918960385652593e-07, "loss": 0.00024956438574008644, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5286, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.5, "completions/mean_length": 95.93750190734863, "completions/min_length": 42.0, "epoch": 7.878629932985852, "grad_norm": 0.0028529575388504744, "kl": 0.291015625, "learning_rate": 1.090421332226818e-07, "loss": 0.00029124802676960826, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5287, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 97.83333587646484, "completions/min_length": 46.75, "epoch": 7.8801191362621, "grad_norm": 0.0029945869378990566, "kl": 0.287109375, "learning_rate": 1.0889475005336446e-07, "loss": 0.0002876116777770221, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5288, "train_speed(iter/s)": 0.027113 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 103.39583587646484, "completions/min_length": 47.0, "epoch": 7.881608339538347, "grad_norm": 0.0028108682538131946, "kl": 0.285888671875, "learning_rate": 1.0874745438154637e-07, "loss": 0.0002853222540579736, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5289, "train_speed(iter/s)": 0.027114 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 96.71875190734863, "completions/min_length": 40.0, "epoch": 7.883097542814594, "grad_norm": 0.0026327014612202854, "kl": 0.25927734375, "learning_rate": 1.0860024624018011e-07, "loss": 0.00025894847931340337, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5290, "train_speed(iter/s)": 0.027112 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.5, "completions/mean_length": 103.81250190734863, "completions/min_length": 48.0, "epoch": 7.884586746090841, "grad_norm": 0.002660598810270121, "kl": 0.27294921875, "learning_rate": 1.0845312566219922e-07, "loss": 0.0002726808888837695, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5291, "train_speed(iter/s)": 0.02711 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 104.87500190734863, "completions/min_length": 40.0, "epoch": 7.886075949367089, "grad_norm": 0.0028793635297362495, "kl": 0.260986328125, "learning_rate": 1.0830609268051688e-07, "loss": 0.00026079799863509834, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5292, "train_speed(iter/s)": 0.027109 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 113.59375381469727, "completions/min_length": 45.75, "epoch": 7.887565152643336, "grad_norm": 0.002615114085278425, "kl": 0.259521484375, "learning_rate": 1.0815914732802744e-07, "loss": 0.00025881428155116737, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5293, "train_speed(iter/s)": 0.027106 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 92.45833396911621, "completions/min_length": 35.0, "epoch": 7.889054355919583, "grad_norm": 0.0029326306895296203, "kl": 0.2783203125, "learning_rate": 1.0801228963760517e-07, "loss": 0.0002775583998300135, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5294, "train_speed(iter/s)": 0.027105 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 102.10416984558105, "completions/min_length": 44.25, "epoch": 7.89054355919583, "grad_norm": 0.0030210244387049306, "kl": 0.263671875, "learning_rate": 1.0786551964210477e-07, "loss": 0.0002635732525959611, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5295, "train_speed(iter/s)": 0.027107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 99.93750381469727, "completions/min_length": 45.75, "epoch": 7.892032762472077, "grad_norm": 0.0025255319508714507, "kl": 0.27587890625, "learning_rate": 1.0771883737436171e-07, "loss": 0.00027598917949944735, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5296, "train_speed(iter/s)": 0.027107 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.75, "completions/mean_length": 106.25000190734863, "completions/min_length": 44.5, "epoch": 7.893521965748325, "grad_norm": 0.0027605148422773567, "kl": 0.2421875, "learning_rate": 1.0757224286719107e-07, "loss": 0.00024174094141926616, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5297, "train_speed(iter/s)": 0.027106 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 99.94791984558105, "completions/min_length": 45.25, "epoch": 7.895011169024572, "grad_norm": 2.520981438655936, "kl": 0.267578125, "learning_rate": 1.0742573615338918e-07, "loss": -0.0065836599096655846, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5298, "train_speed(iter/s)": 0.027106 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 87.86458587646484, "completions/min_length": 39.5, "epoch": 7.896500372300819, "grad_norm": 0.0029204734788075767, "kl": 0.30419921875, "learning_rate": 1.0727931726573214e-07, "loss": 0.0003039363364223391, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5299, "train_speed(iter/s)": 0.027105 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 94.34375190734863, "completions/min_length": 40.5, "epoch": 7.897989575577066, "grad_norm": 0.003436097013605921, "kl": 0.263671875, "learning_rate": 1.0713298623697652e-07, "loss": 0.00026350608095526695, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5300, "train_speed(iter/s)": 0.027104 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 100.39583587646484, "completions/min_length": 38.75, "epoch": 7.899478778853314, "grad_norm": 0.010705409367552203, "kl": 0.27099609375, "learning_rate": 1.0698674309985944e-07, "loss": 0.0002703427744563669, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5301, "train_speed(iter/s)": 0.027103 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.25, "completions/mean_length": 91.70833396911621, "completions/min_length": 32.5, "epoch": 7.90096798212956, "grad_norm": 1.035989735906272, "kl": 0.284423828125, "learning_rate": 1.0684058788709788e-07, "loss": 0.004539195913821459, "memory(GiB)": 112.53, "reward": 1.6770834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.677083358168602, "rewards/CineAccuracyORM/std": 0.47723397612571716, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5302, "train_speed(iter/s)": 0.027102 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 98.92708778381348, "completions/min_length": 48.0, "epoch": 7.902457185405808, "grad_norm": 0.0025641511363999735, "kl": 0.2568359375, "learning_rate": 1.066945206313899e-07, "loss": 0.00025679831742309034, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5303, "train_speed(iter/s)": 0.027104 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 97.50000381469727, "completions/min_length": 41.0, "epoch": 7.903946388682055, "grad_norm": 2.1910516919106247, "kl": 0.27001953125, "learning_rate": 1.0654854136541324e-07, "loss": 0.024772269651293755, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5304, "train_speed(iter/s)": 0.027103 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.0, "completions/mean_length": 117.48958396911621, "completions/min_length": 41.75, "epoch": 7.905435591958303, "grad_norm": 0.024114986392730794, "kl": 0.2451171875, "learning_rate": 1.0640265012182615e-07, "loss": 0.00024508286151103675, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5305, "train_speed(iter/s)": 0.027103 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.75, "completions/mean_length": 109.21875190734863, "completions/min_length": 33.0, "epoch": 7.906924795234549, "grad_norm": 0.8215499864206103, "kl": 0.246826171875, "learning_rate": 1.0625684693326725e-07, "loss": 0.014876525849103928, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5306, "train_speed(iter/s)": 0.027101 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 101.32291793823242, "completions/min_length": 39.25, "epoch": 7.908413998510797, "grad_norm": 0.0026737508673143878, "kl": 0.27880859375, "learning_rate": 1.0611113183235526e-07, "loss": 0.00027920579304918647, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5307, "train_speed(iter/s)": 0.027099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.25, "completions/mean_length": 99.63541984558105, "completions/min_length": 34.5, "epoch": 7.909903201787044, "grad_norm": 0.002624057899473314, "kl": 0.258544921875, "learning_rate": 1.0596550485168971e-07, "loss": 0.0002586183254607022, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5308, "train_speed(iter/s)": 0.027099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.25, "completions/mean_length": 103.08333778381348, "completions/min_length": 40.25, "epoch": 7.911392405063291, "grad_norm": 0.0027149616569431363, "kl": 0.267578125, "learning_rate": 1.0581996602384973e-07, "loss": 0.0002674809074960649, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5309, "train_speed(iter/s)": 0.027099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 96.26041984558105, "completions/min_length": 43.75, "epoch": 7.912881608339538, "grad_norm": 1.811971712134569, "kl": 0.255615234375, "learning_rate": 1.0567451538139527e-07, "loss": 0.013218702748417854, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5310, "train_speed(iter/s)": 0.027099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.75, "completions/mean_length": 96.05208587646484, "completions/min_length": 40.25, "epoch": 7.914370811615786, "grad_norm": 0.002599000473117581, "kl": 0.266845703125, "learning_rate": 1.055291529568662e-07, "loss": 0.000266663555521518, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5311, "train_speed(iter/s)": 0.0271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.75, "completions/mean_length": 89.62500190734863, "completions/min_length": 35.0, "epoch": 7.915860014892033, "grad_norm": 1.124772395793773, "kl": 0.2734375, "learning_rate": 1.0538387878278282e-07, "loss": -0.0013577122008427978, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5312, "train_speed(iter/s)": 0.0271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 88.81250190734863, "completions/min_length": 39.75, "epoch": 7.91734921816828, "grad_norm": 0.00291383314415219, "kl": 0.2939453125, "learning_rate": 1.0523869289164555e-07, "loss": 0.00029379845364019275, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5313, "train_speed(iter/s)": 0.027101 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.25, "completions/mean_length": 101.04166984558105, "completions/min_length": 43.75, "epoch": 7.918838421444527, "grad_norm": 0.0028170375592941072, "kl": 0.268310546875, "learning_rate": 1.0509359531593536e-07, "loss": 0.0002684943610802293, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5314, "train_speed(iter/s)": 0.0271 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.5, "completions/mean_length": 103.77083778381348, "completions/min_length": 44.0, "epoch": 7.920327624720774, "grad_norm": 0.01579702974277538, "kl": 0.26806640625, "learning_rate": 1.0494858608811324e-07, "loss": 0.00026814083685167134, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5315, "train_speed(iter/s)": 0.027099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 101.37500381469727, "completions/min_length": 38.25, "epoch": 7.921816827997022, "grad_norm": 1.7337356201409597, "kl": 0.255859375, "learning_rate": 1.048036652406204e-07, "loss": -0.0033562970347702503, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5316, "train_speed(iter/s)": 0.027098 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 304.5, "completions/mean_length": 110.53125190734863, "completions/min_length": 42.75, "epoch": 7.923306031273269, "grad_norm": 0.0025542110968359046, "kl": 0.26171875, "learning_rate": 1.0465883280587834e-07, "loss": 0.00026184620219282806, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5317, "train_speed(iter/s)": 0.027098 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 107.18750190734863, "completions/min_length": 51.0, "epoch": 7.924795234549516, "grad_norm": 1.896059022026488, "kl": 0.244140625, "learning_rate": 1.0451408881628854e-07, "loss": 0.010010560043156147, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.07715167850255966, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4772557094693184, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5318, "train_speed(iter/s)": 0.027099 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.25, "completions/mean_length": 95.66666984558105, "completions/min_length": 39.75, "epoch": 7.926284437825763, "grad_norm": 0.002655773779292426, "kl": 0.26513671875, "learning_rate": 1.043694333042333e-07, "loss": 0.0002650687238201499, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5319, "train_speed(iter/s)": 0.027098 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.5, "completions/mean_length": 109.41666793823242, "completions/min_length": 36.5, "epoch": 7.927773641102011, "grad_norm": 0.0025366990234117954, "kl": 0.2578125, "learning_rate": 1.0422486630207455e-07, "loss": 0.00025845447089523077, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5320, "train_speed(iter/s)": 0.027098 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 100.59375190734863, "completions/min_length": 38.5, "epoch": 7.929262844378258, "grad_norm": 0.002795066791595096, "kl": 0.286865234375, "learning_rate": 1.0408038784215461e-07, "loss": 0.00028714624932035804, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5321, "train_speed(iter/s)": 0.027098 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 92.87500190734863, "completions/min_length": 37.0, "epoch": 7.930752047654504, "grad_norm": 0.0029535309996405208, "kl": 0.29150390625, "learning_rate": 1.0393599795679598e-07, "loss": 0.0002914979122579098, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5322, "train_speed(iter/s)": 0.027098 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 104.79166793823242, "completions/min_length": 52.0, "epoch": 7.932241250930752, "grad_norm": 0.0024833519805971026, "kl": 0.25634765625, "learning_rate": 1.0379169667830123e-07, "loss": 0.00025632185861468315, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5323, "train_speed(iter/s)": 0.027097 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.5, "completions/mean_length": 104.13541793823242, "completions/min_length": 45.75, "epoch": 7.933730454207, "grad_norm": 0.0027480505178578687, "kl": 0.27001953125, "learning_rate": 1.0364748403895368e-07, "loss": 0.00026959829847328365, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5324, "train_speed(iter/s)": 0.027094 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 92.22917175292969, "completions/min_length": 38.5, "epoch": 7.9352196574832465, "grad_norm": 0.0026432710191237265, "kl": 0.283935546875, "learning_rate": 1.0350336007101579e-07, "loss": 0.00028404081240296364, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5325, "train_speed(iter/s)": 0.027093 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 96.09375190734863, "completions/min_length": 40.75, "epoch": 7.936708860759493, "grad_norm": 0.003009376923584196, "kl": 0.27783203125, "learning_rate": 1.033593248067312e-07, "loss": 0.0002778077032417059, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5326, "train_speed(iter/s)": 0.02709 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 94.33333587646484, "completions/min_length": 45.75, "epoch": 7.938198064035741, "grad_norm": 0.002678205945509111, "kl": 0.28271484375, "learning_rate": 1.0321537827832311e-07, "loss": 0.0002821198431774974, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5327, "train_speed(iter/s)": 0.027089 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 97.34375381469727, "completions/min_length": 38.5, "epoch": 7.939687267311988, "grad_norm": 1.1122674377458546, "kl": 0.294921875, "learning_rate": 1.0307152051799484e-07, "loss": 0.02293885499238968, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5328, "train_speed(iter/s)": 0.027088 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 106.39583587646484, "completions/min_length": 45.0, "epoch": 7.9411764705882355, "grad_norm": 0.00258993911590655, "kl": 0.265625, "learning_rate": 1.0292775155793055e-07, "loss": 0.00026534043718129396, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5329, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.0, "completions/mean_length": 100.37500190734863, "completions/min_length": 48.75, "epoch": 7.942665673864482, "grad_norm": 1.1932937247227202, "kl": 0.263916015625, "learning_rate": 1.0278407143029343e-07, "loss": 0.009254679083824158, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5330, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 99.69791793823242, "completions/min_length": 53.0, "epoch": 7.94415487714073, "grad_norm": 0.0030273341336152044, "kl": 0.27880859375, "learning_rate": 1.0264048016722781e-07, "loss": 0.0002786885015666485, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5331, "train_speed(iter/s)": 0.027087 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.0, "completions/mean_length": 107.47916793823242, "completions/min_length": 43.5, "epoch": 7.945644080416977, "grad_norm": 0.002624581914463192, "kl": 0.247802734375, "learning_rate": 1.0249697780085759e-07, "loss": 0.00024780904641374946, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5332, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/mean_length": 98.43750190734863, "completions/min_length": 42.75, "epoch": 7.9471332836932245, "grad_norm": 0.0028543006252593406, "kl": 0.26220703125, "learning_rate": 1.0235356436328674e-07, "loss": 0.000262761750491336, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5333, "train_speed(iter/s)": 0.027087 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 97.97916793823242, "completions/min_length": 38.5, "epoch": 7.948622486969471, "grad_norm": 0.0025601532239522247, "kl": 0.2607421875, "learning_rate": 1.0221023988659987e-07, "loss": 0.0002604220062494278, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5334, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 95.63541984558105, "completions/min_length": 35.25, "epoch": 7.950111690245718, "grad_norm": 0.0027741227796561994, "kl": 0.28076171875, "learning_rate": 1.0206700440286092e-07, "loss": 0.0002813197788782418, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5335, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 100.62500381469727, "completions/min_length": 42.5, "epoch": 7.951600893521966, "grad_norm": 0.0024682245054382067, "kl": 0.263671875, "learning_rate": 1.0192385794411479e-07, "loss": 0.0002636783756315708, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5336, "train_speed(iter/s)": 0.027084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 96.58333587646484, "completions/min_length": 40.0, "epoch": 7.953090096798213, "grad_norm": 0.0031050969126153356, "kl": 0.279541015625, "learning_rate": 1.0178080054238542e-07, "loss": 0.00027916705585084856, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5337, "train_speed(iter/s)": 0.027084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 108.16666984558105, "completions/min_length": 39.25, "epoch": 7.95457930007446, "grad_norm": 1.0466368213642507, "kl": 0.260498046875, "learning_rate": 1.0163783222967787e-07, "loss": -0.02069207653403282, "memory(GiB)": 112.53, "reward": 1.510416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.510416679084301, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5338, "train_speed(iter/s)": 0.027083 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 91.51041793823242, "completions/min_length": 31.5, "epoch": 7.956068503350707, "grad_norm": 0.0026093934255637967, "kl": 0.2763671875, "learning_rate": 1.0149495303797667e-07, "loss": 0.0002762569347396493, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5339, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 102.36458778381348, "completions/min_length": 38.5, "epoch": 7.957557706626955, "grad_norm": 0.00252852689926538, "kl": 0.253662109375, "learning_rate": 1.0135216299924637e-07, "loss": 0.00025340283173136413, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5340, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 105.03125381469727, "completions/min_length": 45.5, "epoch": 7.9590469099032015, "grad_norm": 0.0025239127142280045, "kl": 0.27880859375, "learning_rate": 1.0120946214543213e-07, "loss": 0.0002787195553537458, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5341, "train_speed(iter/s)": 0.027084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 86.50000190734863, "completions/min_length": 36.5, "epoch": 7.960536113179449, "grad_norm": 0.0026495520787557752, "kl": 0.29150390625, "learning_rate": 1.0106685050845837e-07, "loss": 0.00029163225553929806, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5342, "train_speed(iter/s)": 0.027082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 98.58333587646484, "completions/min_length": 42.25, "epoch": 7.962025316455696, "grad_norm": 0.0027120184704420703, "kl": 0.2841796875, "learning_rate": 1.0092432812023022e-07, "loss": 0.00028380900039337575, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5343, "train_speed(iter/s)": 0.027082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.25, "completions/mean_length": 92.63541793823242, "completions/min_length": 36.5, "epoch": 7.963514519731944, "grad_norm": 0.0028824056584571454, "kl": 0.275146484375, "learning_rate": 1.0078189501263252e-07, "loss": 0.0002747914404608309, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5344, "train_speed(iter/s)": 0.027084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.25, "completions/mean_length": 90.63541793823242, "completions/min_length": 39.25, "epoch": 7.9650037230081905, "grad_norm": 0.01868547461084011, "kl": 0.35546875, "learning_rate": 1.0063955121752998e-07, "loss": 0.00035564618883654475, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5345, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.5, "completions/mean_length": 103.96875381469727, "completions/min_length": 45.25, "epoch": 7.966492926284438, "grad_norm": 0.0031429939433847446, "kl": 0.25439453125, "learning_rate": 1.0049729676676794e-07, "loss": 0.00025419617304578424, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5346, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 106.36458587646484, "completions/min_length": 44.0, "epoch": 7.967982129560685, "grad_norm": 0.0025684252084393925, "kl": 0.248291015625, "learning_rate": 1.0035513169217113e-07, "loss": 0.0002483871066942811, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5347, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.5, "completions/mean_length": 111.44791984558105, "completions/min_length": 39.25, "epoch": 7.969471332836932, "grad_norm": 0.002271057311571933, "kl": 0.246826171875, "learning_rate": 1.0021305602554458e-07, "loss": 0.0002461874973960221, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5348, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 84.13541984558105, "completions/min_length": 44.0, "epoch": 7.9709605361131795, "grad_norm": 0.002790071383266435, "kl": 0.28515625, "learning_rate": 1.0007106979867325e-07, "loss": 0.0002855696657206863, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5349, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 93.39583587646484, "completions/min_length": 39.0, "epoch": 7.972449739389427, "grad_norm": 0.0024882087146960025, "kl": 0.266357421875, "learning_rate": 9.992917304332204e-08, "loss": 0.00026661501033231616, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5350, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 108.05208396911621, "completions/min_length": 49.25, "epoch": 7.973938942665674, "grad_norm": 0.010761780152242655, "kl": 0.26416015625, "learning_rate": 9.978736579123576e-08, "loss": 0.0002645889762789011, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5351, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.5, "completions/mean_length": 98.40625190734863, "completions/min_length": 39.0, "epoch": 7.975428145941921, "grad_norm": 0.0026541462146036873, "kl": 0.28564453125, "learning_rate": 9.964564807413961e-08, "loss": 0.0002858286607079208, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5352, "train_speed(iter/s)": 0.027084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 99.61458587646484, "completions/min_length": 38.5, "epoch": 7.976917349218168, "grad_norm": 0.8505399139167732, "kl": 0.252685546875, "learning_rate": 9.950401992373836e-08, "loss": 0.002746454207226634, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5353, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 100.65625190734863, "completions/min_length": 44.75, "epoch": 7.978406552494415, "grad_norm": 0.0028747872205869412, "kl": 0.24609375, "learning_rate": 9.936248137171682e-08, "loss": 0.00024629264953546226, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5354, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 101.32291793823242, "completions/min_length": 45.75, "epoch": 7.979895755770663, "grad_norm": 0.0026308187735842765, "kl": 0.2626953125, "learning_rate": 9.922103244973979e-08, "loss": 0.00026241812156513333, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5355, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 92.44791793823242, "completions/min_length": 41.75, "epoch": 7.98138495904691, "grad_norm": 0.5673180558856626, "kl": 0.291015625, "learning_rate": 9.907967318945188e-08, "loss": 0.018757561221718788, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166669771075, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5356, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.5, "completions/mean_length": 109.43750190734863, "completions/min_length": 37.0, "epoch": 7.982874162323157, "grad_norm": 1.8938711847078968, "kl": 0.25, "learning_rate": 9.893840362247807e-08, "loss": -0.004422340542078018, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.833333358168602, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5357, "train_speed(iter/s)": 0.027088 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 99.47916984558105, "completions/min_length": 31.75, "epoch": 7.984363365599404, "grad_norm": 2.0104019556923536, "kl": 0.267822265625, "learning_rate": 9.879722378042288e-08, "loss": 0.0006520537426695228, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5358, "train_speed(iter/s)": 0.027088 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 94.09375190734863, "completions/min_length": 39.5, "epoch": 7.985852568875652, "grad_norm": 3.593947137032461, "kl": 0.31591796875, "learning_rate": 9.86561336948708e-08, "loss": 0.0020683337934315205, "memory(GiB)": 112.53, "reward": 1.9375000298023224, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.9375000149011612, "rewards/CineAccuracyORM/std": 0.16575583815574646, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5359, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 95.02083587646484, "completions/min_length": 31.75, "epoch": 7.987341772151899, "grad_norm": 0.002510637381453688, "kl": 0.271240234375, "learning_rate": 9.851513339738626e-08, "loss": 0.0002707802050281316, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5360, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 102.59375190734863, "completions/min_length": 31.75, "epoch": 7.9888309754281455, "grad_norm": 0.0024949751588018592, "kl": 0.257568359375, "learning_rate": 9.837422291951364e-08, "loss": 0.000257632666034624, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5361, "train_speed(iter/s)": 0.027086 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 97.44791984558105, "completions/min_length": 41.0, "epoch": 7.990320178704393, "grad_norm": 0.0027957820983694516, "kl": 0.278564453125, "learning_rate": 9.823340229277754e-08, "loss": 0.00027817307272925973, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5362, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.0, "completions/mean_length": 110.70833778381348, "completions/min_length": 42.0, "epoch": 7.991809381980641, "grad_norm": 0.00283591477774179, "kl": 0.246826171875, "learning_rate": 9.809267154868162e-08, "loss": 0.0002466103178448975, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5363, "train_speed(iter/s)": 0.027084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 80.84375190734863, "completions/min_length": 34.0, "epoch": 7.993298585256888, "grad_norm": 0.002879713137474278, "kl": 0.3232421875, "learning_rate": 9.795203071871033e-08, "loss": 0.00032316576107405126, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5364, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.75, "completions/mean_length": 98.82291793823242, "completions/min_length": 42.0, "epoch": 7.994787788533134, "grad_norm": 0.0025954850145077026, "kl": 0.253173828125, "learning_rate": 9.781147983432758e-08, "loss": 0.00025316368555650115, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5365, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 88.90625190734863, "completions/min_length": 33.0, "epoch": 7.996276991809382, "grad_norm": 0.0026558285425607696, "kl": 0.28466796875, "learning_rate": 9.767101892697699e-08, "loss": 0.00028507522074505687, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5366, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 102.29166793823242, "completions/min_length": 43.75, "epoch": 7.997766195085629, "grad_norm": 0.002776316659193221, "kl": 0.263427734375, "learning_rate": 9.753064802808275e-08, "loss": 0.00026367121608927846, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5367, "train_speed(iter/s)": 0.027083 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 92.33333396911621, "completions/min_length": 39.75, "epoch": 7.999255398361877, "grad_norm": 0.002911558559408898, "kl": 0.269287109375, "learning_rate": 9.739036716904787e-08, "loss": 0.00026891345623880625, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5368, "train_speed(iter/s)": 0.027085 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 99.15625190734863, "completions/min_length": 36.0, "epoch": 8.001489203276247, "grad_norm": 0.0027139676223895013, "kl": 0.2763671875, "learning_rate": 9.72501763812561e-08, "loss": 0.00027663560467772186, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5369, "train_speed(iter/s)": 0.027084 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 97.36458587646484, "completions/min_length": 38.5, "epoch": 8.002978406552494, "grad_norm": 0.0027633806847369275, "kl": 0.26708984375, "learning_rate": 9.711007569607072e-08, "loss": 0.00026728527154773474, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5370, "train_speed(iter/s)": 0.027082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.0, "completions/mean_length": 86.42708587646484, "completions/min_length": 41.0, "epoch": 8.004467609828742, "grad_norm": 0.0037013317061505165, "kl": 0.2900390625, "learning_rate": 9.697006514483464e-08, "loss": 0.0002891983895096928, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5371, "train_speed(iter/s)": 0.027082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.75, "completions/mean_length": 98.31250381469727, "completions/min_length": 39.0, "epoch": 8.005956813104989, "grad_norm": 0.002730074324404949, "kl": 0.262939453125, "learning_rate": 9.683014475887125e-08, "loss": 0.00026332479319535196, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5372, "train_speed(iter/s)": 0.027082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.25, "completions/mean_length": 110.94791793823242, "completions/min_length": 43.0, "epoch": 8.007446016381236, "grad_norm": 0.6990346939052501, "kl": 0.2548828125, "learning_rate": 9.669031456948279e-08, "loss": 0.012472741305828094, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5373, "train_speed(iter/s)": 0.02708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/mean_length": 104.91666984558105, "completions/min_length": 37.25, "epoch": 8.008935219657483, "grad_norm": 0.0033850530224021785, "kl": 0.23779296875, "learning_rate": 9.655057460795234e-08, "loss": 0.00023761978081893176, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5374, "train_speed(iter/s)": 0.02708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.5, "completions/mean_length": 102.16666984558105, "completions/min_length": 35.0, "epoch": 8.010424422933731, "grad_norm": 0.9645439752264985, "kl": 0.269287109375, "learning_rate": 9.641092490554192e-08, "loss": -0.009121976792812347, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5375, "train_speed(iter/s)": 0.02708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 96.80208778381348, "completions/min_length": 35.5, "epoch": 8.011913626209978, "grad_norm": 0.0024454261522928815, "kl": 0.27294921875, "learning_rate": 9.627136549349408e-08, "loss": 0.00027258004411123693, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5376, "train_speed(iter/s)": 0.02708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 85.88541984558105, "completions/min_length": 33.25, "epoch": 8.013402829486225, "grad_norm": 0.002659985142604841, "kl": 0.29638671875, "learning_rate": 9.613189640303066e-08, "loss": 0.0002964609884656966, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5377, "train_speed(iter/s)": 0.027082 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 314.25, "completions/mean_length": 100.84375, "completions/min_length": 37.5, "epoch": 8.014892032762472, "grad_norm": 0.002437245979196543, "kl": 0.2705078125, "learning_rate": 9.599251766535343e-08, "loss": 0.0002703522623050958, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5378, "train_speed(iter/s)": 0.027081 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 87.11458587646484, "completions/min_length": 30.25, "epoch": 8.01638123603872, "grad_norm": 0.002852948238266075, "kl": 0.30029296875, "learning_rate": 9.585322931164441e-08, "loss": 0.0003004547324962914, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5379, "train_speed(iter/s)": 0.02708 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 91.10416793823242, "completions/min_length": 38.75, "epoch": 8.017870439314967, "grad_norm": 0.0028196126664917836, "kl": 0.288330078125, "learning_rate": 9.571403137306444e-08, "loss": 0.0002881234686356038, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5380, "train_speed(iter/s)": 0.027078 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 101.98958587646484, "completions/min_length": 41.25, "epoch": 8.019359642591214, "grad_norm": 0.0024807185939194057, "kl": 0.27001953125, "learning_rate": 9.5574923880755e-08, "loss": 0.0002692003035917878, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5381, "train_speed(iter/s)": 0.027077 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 99.92708587646484, "completions/min_length": 43.5, "epoch": 8.02084884586746, "grad_norm": 0.003301895795808469, "kl": 0.264404296875, "learning_rate": 9.543590686583697e-08, "loss": 0.0002647367655299604, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5382, "train_speed(iter/s)": 0.027078 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.0, "completions/mean_length": 98.75000190734863, "completions/min_length": 42.0, "epoch": 8.022338049143707, "grad_norm": 0.002668770609932982, "kl": 0.28173828125, "learning_rate": 9.529698035941092e-08, "loss": 0.0002816258929669857, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5383, "train_speed(iter/s)": 0.027078 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 95.13542175292969, "completions/min_length": 33.0, "epoch": 8.023827252419956, "grad_norm": 0.002517701363307001, "kl": 0.26416015625, "learning_rate": 9.51581443925576e-08, "loss": 0.00026446738047525287, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5384, "train_speed(iter/s)": 0.027076 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.25, "completions/mean_length": 101.85416984558105, "completions/min_length": 39.0, "epoch": 8.025316455696203, "grad_norm": 0.0027731967637401535, "kl": 0.28759765625, "learning_rate": 9.50193989963367e-08, "loss": 0.0002870276221074164, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5385, "train_speed(iter/s)": 0.027074 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.0, "completions/mean_length": 85.69791984558105, "completions/min_length": 44.25, "epoch": 8.02680565897245, "grad_norm": 0.0026803158637887657, "kl": 0.274169921875, "learning_rate": 9.48807442017886e-08, "loss": 0.00027406669687479734, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5386, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 96.16666984558105, "completions/min_length": 35.25, "epoch": 8.028294862248696, "grad_norm": 0.0026273551025594657, "kl": 0.259521484375, "learning_rate": 9.474218003993273e-08, "loss": 0.00026030215667560697, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5387, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.25, "completions/mean_length": 99.47916984558105, "completions/min_length": 37.5, "epoch": 8.029784065524945, "grad_norm": 0.0025753791211408617, "kl": 0.2861328125, "learning_rate": 9.460370654176846e-08, "loss": 0.0002862618421204388, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5388, "train_speed(iter/s)": 0.027072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 96.46875190734863, "completions/min_length": 33.25, "epoch": 8.031273268801192, "grad_norm": 0.0026724988816049637, "kl": 0.270263671875, "learning_rate": 9.446532373827492e-08, "loss": 0.00027050942298956215, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5389, "train_speed(iter/s)": 0.027074 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 94.09375190734863, "completions/min_length": 39.0, "epoch": 8.032762472077438, "grad_norm": 0.0026763190746967153, "kl": 0.269287109375, "learning_rate": 9.432703166041084e-08, "loss": 0.000268894771579653, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5390, "train_speed(iter/s)": 0.027076 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 106.88541984558105, "completions/min_length": 31.0, "epoch": 8.034251675353685, "grad_norm": 0.48169161375560926, "kl": 0.2646484375, "learning_rate": 9.418883033911484e-08, "loss": 0.0018146319780498743, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5391, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 100.06250190734863, "completions/min_length": 45.25, "epoch": 8.035740878629934, "grad_norm": 0.00326638509596104, "kl": 0.281982421875, "learning_rate": 9.40507198053051e-08, "loss": 0.00028182874666526914, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5392, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 98.81250381469727, "completions/min_length": 39.0, "epoch": 8.03723008190618, "grad_norm": 0.0024145928616848126, "kl": 0.282958984375, "learning_rate": 9.391270008987945e-08, "loss": 0.00028224504785612226, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5393, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 87.33333587646484, "completions/min_length": 38.0, "epoch": 8.038719285182427, "grad_norm": 0.002828127027767732, "kl": 0.2978515625, "learning_rate": 9.377477122371546e-08, "loss": 0.0002979140845127404, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5394, "train_speed(iter/s)": 0.027072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.5, "completions/mean_length": 88.18750190734863, "completions/min_length": 44.0, "epoch": 8.040208488458674, "grad_norm": 0.0025907393120612222, "kl": 0.2919921875, "learning_rate": 9.363693323767035e-08, "loss": 0.000291919510345906, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5395, "train_speed(iter/s)": 0.027072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 104.17708778381348, "completions/min_length": 40.25, "epoch": 8.041697691734921, "grad_norm": 0.004110232991665667, "kl": 0.26513671875, "learning_rate": 9.349918616258113e-08, "loss": 0.0002652726834639907, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5396, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.25, "completions/mean_length": 103.71875381469727, "completions/min_length": 39.0, "epoch": 8.04318689501117, "grad_norm": 0.002965718195706963, "kl": 0.27587890625, "learning_rate": 9.33615300292644e-08, "loss": 0.0002757275942713022, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5397, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 89.90625190734863, "completions/min_length": 37.75, "epoch": 8.044676098287416, "grad_norm": 0.002692838428503142, "kl": 0.28759765625, "learning_rate": 9.322396486851625e-08, "loss": 0.0002875438949558884, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5398, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.75, "completions/mean_length": 99.10417175292969, "completions/min_length": 48.5, "epoch": 8.046165301563663, "grad_norm": 0.00305846566841681, "kl": 0.2998046875, "learning_rate": 9.308649071111257e-08, "loss": 0.0002991257351823151, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5399, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 85.16666984558105, "completions/min_length": 38.75, "epoch": 8.04765450483991, "grad_norm": 0.0027915969064442703, "kl": 0.2900390625, "learning_rate": 9.294910758780917e-08, "loss": 0.0002901020925492048, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5400, "train_speed(iter/s)": 0.027072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 91.42708587646484, "completions/min_length": 39.5, "epoch": 8.049143708116159, "grad_norm": 0.0032447644280336613, "kl": 0.286376953125, "learning_rate": 9.28118155293407e-08, "loss": 0.0002861910033971071, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5401, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.75, "completions/mean_length": 95.68750190734863, "completions/min_length": 33.75, "epoch": 8.050632911392405, "grad_norm": 0.0023438933756397893, "kl": 0.257080078125, "learning_rate": 9.267461456642234e-08, "loss": 0.00025705626467242837, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5402, "train_speed(iter/s)": 0.027074 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.0, "completions/mean_length": 105.53125190734863, "completions/min_length": 47.5, "epoch": 8.052122114668652, "grad_norm": 0.0022706382894671705, "kl": 0.271484375, "learning_rate": 9.253750472974836e-08, "loss": 0.00027134621632285416, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5403, "train_speed(iter/s)": 0.027074 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 97.80208587646484, "completions/min_length": 49.0, "epoch": 8.053611317944899, "grad_norm": 0.0028562395664593464, "kl": 0.272216796875, "learning_rate": 9.240048604999262e-08, "loss": 0.0002721041673794389, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5404, "train_speed(iter/s)": 0.027074 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 96.63541984558105, "completions/min_length": 44.0, "epoch": 8.055100521221148, "grad_norm": 0.00262802922885824, "kl": 0.286376953125, "learning_rate": 9.22635585578092e-08, "loss": 0.0002869990421459079, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5405, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.25, "completions/mean_length": 109.69791984558105, "completions/min_length": 40.75, "epoch": 8.056589724497394, "grad_norm": 1.374290227968693, "kl": 0.26416015625, "learning_rate": 9.212672228383072e-08, "loss": 0.013130305334925652, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5406, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 99.54166793823242, "completions/min_length": 37.75, "epoch": 8.058078927773641, "grad_norm": 0.0024958859803433956, "kl": 0.277099609375, "learning_rate": 9.198997725867042e-08, "loss": 0.00027695027529262006, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5407, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 93.47916984558105, "completions/min_length": 35.25, "epoch": 8.059568131049888, "grad_norm": 0.0027325796547404075, "kl": 0.28515625, "learning_rate": 9.185332351292058e-08, "loss": 0.00028503526118583977, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5408, "train_speed(iter/s)": 0.02707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 159.5, "completions/mean_length": 80.8125, "completions/min_length": 38.25, "epoch": 8.061057334326135, "grad_norm": 0.0030227642078659244, "kl": 0.3056640625, "learning_rate": 9.171676107715303e-08, "loss": 0.00030547427013516426, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5409, "train_speed(iter/s)": 0.02707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 97.55208587646484, "completions/min_length": 34.75, "epoch": 8.062546537602383, "grad_norm": 2.190533132927808, "kl": 0.251953125, "learning_rate": 9.158028998191975e-08, "loss": -0.023910652846097946, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5410, "train_speed(iter/s)": 0.027072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 88.14583587646484, "completions/min_length": 35.75, "epoch": 8.06403574087863, "grad_norm": 0.0026844135550327794, "kl": 0.29248046875, "learning_rate": 9.144391025775123e-08, "loss": 0.0002921759441960603, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5411, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 96.10416984558105, "completions/min_length": 33.5, "epoch": 8.065524944154877, "grad_norm": 0.0027075999223923437, "kl": 0.28515625, "learning_rate": 9.130762193515879e-08, "loss": 0.00028550298884510994, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5412, "train_speed(iter/s)": 0.027072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 84.48958396911621, "completions/min_length": 35.0, "epoch": 8.067014147431124, "grad_norm": 0.0029553092976939907, "kl": 0.3076171875, "learning_rate": 9.117142504463204e-08, "loss": 0.00030737422639504075, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5413, "train_speed(iter/s)": 0.027072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 317.25, "completions/mean_length": 109.12500190734863, "completions/min_length": 38.75, "epoch": 8.068503350707372, "grad_norm": 0.002405903595812715, "kl": 0.253173828125, "learning_rate": 9.103531961664118e-08, "loss": 0.0002526089665479958, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5414, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 96.22916984558105, "completions/min_length": 40.5, "epoch": 8.069992553983619, "grad_norm": 0.0025229953747890046, "kl": 0.271728515625, "learning_rate": 9.089930568163545e-08, "loss": 0.00027121283346787095, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5415, "train_speed(iter/s)": 0.02707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 94.91666984558105, "completions/min_length": 32.75, "epoch": 8.071481757259866, "grad_norm": 0.0025450954513653165, "kl": 0.255859375, "learning_rate": 9.076338327004346e-08, "loss": 0.00025561501388438046, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5416, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 100.75000190734863, "completions/min_length": 45.0, "epoch": 8.072970960536113, "grad_norm": 0.003292147582958293, "kl": 0.2666015625, "learning_rate": 9.062755241227399e-08, "loss": 0.0002655601711012423, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5417, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 106.76041793823242, "completions/min_length": 42.5, "epoch": 8.074460163812361, "grad_norm": 0.0026339314013411295, "kl": 0.25830078125, "learning_rate": 9.04918131387144e-08, "loss": 0.00025847138022072613, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5418, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 102.78125381469727, "completions/min_length": 35.75, "epoch": 8.075949367088608, "grad_norm": 0.0023109229619340883, "kl": 0.259033203125, "learning_rate": 9.035616547973251e-08, "loss": 0.00025888142408803105, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5419, "train_speed(iter/s)": 0.027072 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 101.3750057220459, "completions/min_length": 36.75, "epoch": 8.077438570364855, "grad_norm": 0.004381232429231289, "kl": 0.291015625, "learning_rate": 9.022060946567511e-08, "loss": 0.0002909594331867993, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5420, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.25, "completions/mean_length": 110.58333778381348, "completions/min_length": 42.0, "epoch": 8.078927773641102, "grad_norm": 0.008332365439546774, "kl": 0.2802734375, "learning_rate": 9.008514512686838e-08, "loss": 0.00028024293715134263, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5421, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.0, "completions/mean_length": 101.94791984558105, "completions/min_length": 36.0, "epoch": 8.080416976917348, "grad_norm": 0.9385296116015355, "kl": 0.25927734375, "learning_rate": 8.994977249361863e-08, "loss": 0.0035873902961611748, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5422, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.25, "completions/mean_length": 94.01041984558105, "completions/min_length": 40.5, "epoch": 8.081906180193597, "grad_norm": 0.0027534216853932207, "kl": 0.27880859375, "learning_rate": 8.981449159621074e-08, "loss": 0.0002781833754852414, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5423, "train_speed(iter/s)": 0.02707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.0, "completions/mean_length": 104.55208778381348, "completions/min_length": 37.25, "epoch": 8.083395383469844, "grad_norm": 0.0022972122778421927, "kl": 0.25537109375, "learning_rate": 8.96793024649099e-08, "loss": 0.00025548521080054343, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5424, "train_speed(iter/s)": 0.027069 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 366.25, "completions/mean_length": 96.15625190734863, "completions/min_length": 37.25, "epoch": 8.08488458674609, "grad_norm": 0.002591057318337017, "kl": 0.263671875, "learning_rate": 8.954420512996036e-08, "loss": 0.00026338023599237204, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5425, "train_speed(iter/s)": 0.027069 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.25, "completions/mean_length": 107.93750190734863, "completions/min_length": 48.5, "epoch": 8.086373790022337, "grad_norm": 1.6704458010659058, "kl": 0.245849609375, "learning_rate": 8.940919962158583e-08, "loss": 0.017139453440904617, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5426, "train_speed(iter/s)": 0.027066 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.75, "completions/mean_length": 91.63541984558105, "completions/min_length": 41.25, "epoch": 8.087862993298586, "grad_norm": 0.0025647187495363394, "kl": 0.2958984375, "learning_rate": 8.927428596998954e-08, "loss": 0.00029569899197667837, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5427, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 96.22916793823242, "completions/min_length": 38.25, "epoch": 8.089352196574833, "grad_norm": 1.9418009246791688, "kl": 0.2685546875, "learning_rate": 8.913946420535412e-08, "loss": -0.012695821933448315, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.0725918859243393, "rewards/CineAccuracyORM/mean": 0.6250000074505806, "rewards/CineAccuracyORM/std": 0.36846019327640533, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5428, "train_speed(iter/s)": 0.027068 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 100.04166793823242, "completions/min_length": 34.25, "epoch": 8.09084139985108, "grad_norm": 0.5712825028762126, "kl": 0.258544921875, "learning_rate": 8.900473435784194e-08, "loss": -0.00603180006146431, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5429, "train_speed(iter/s)": 0.027068 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/mean_length": 98.87500190734863, "completions/min_length": 33.25, "epoch": 8.092330603127326, "grad_norm": 0.0026744574549573304, "kl": 0.258544921875, "learning_rate": 8.887009645759441e-08, "loss": 0.0002591127413325012, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5430, "train_speed(iter/s)": 0.02707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 107.69791793823242, "completions/min_length": 38.25, "epoch": 8.093819806403575, "grad_norm": 0.9074476501762349, "kl": 0.241455078125, "learning_rate": 8.873555053473253e-08, "loss": 0.027377169579267502, "memory(GiB)": 112.53, "reward": 1.8750000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5431, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.25, "completions/mean_length": 106.14583587646484, "completions/min_length": 41.25, "epoch": 8.095309009679822, "grad_norm": 0.6580572612489701, "kl": 0.266845703125, "learning_rate": 8.860109661935672e-08, "loss": -0.008185158483684063, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5432, "train_speed(iter/s)": 0.027065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.5, "completions/mean_length": 96.54166984558105, "completions/min_length": 41.25, "epoch": 8.096798212956068, "grad_norm": 0.0024728461165291022, "kl": 0.27685546875, "learning_rate": 8.846673474154665e-08, "loss": 0.0002768085978459567, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5433, "train_speed(iter/s)": 0.027065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 94.72916793823242, "completions/min_length": 34.5, "epoch": 8.098287416232315, "grad_norm": 0.002807651681767832, "kl": 0.283203125, "learning_rate": 8.833246493136187e-08, "loss": 0.00028276455122977495, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5434, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 100.06250381469727, "completions/min_length": 37.75, "epoch": 8.099776619508562, "grad_norm": 0.0026228312641800484, "kl": 0.268310546875, "learning_rate": 8.819828721884093e-08, "loss": 0.00026822611107490957, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5435, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.75, "completions/mean_length": 99.61458587646484, "completions/min_length": 38.25, "epoch": 8.10126582278481, "grad_norm": 0.0025550624471656863, "kl": 0.249755859375, "learning_rate": 8.806420163400181e-08, "loss": 0.0002496654342394322, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5436, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.25, "completions/mean_length": 85.91666793823242, "completions/min_length": 41.0, "epoch": 8.102755026061057, "grad_norm": 1.1443520859373306, "kl": 0.28564453125, "learning_rate": 8.7930208206842e-08, "loss": 0.006231904029846191, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5437, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 95.73958587646484, "completions/min_length": 39.0, "epoch": 8.104244229337304, "grad_norm": 0.002438791358499575, "kl": 0.275390625, "learning_rate": 8.77963069673382e-08, "loss": 0.0002753351582214236, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5438, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 100.03125190734863, "completions/min_length": 41.5, "epoch": 8.105733432613551, "grad_norm": 1.297886402960746, "kl": 0.26513671875, "learning_rate": 8.76624979454466e-08, "loss": -0.020564574748277664, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5439, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 90.12500190734863, "completions/min_length": 33.75, "epoch": 8.1072226358898, "grad_norm": 0.013482848247963135, "kl": 0.28662109375, "learning_rate": 8.752878117110296e-08, "loss": 0.0002869143499992788, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5440, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 95.87500190734863, "completions/min_length": 38.0, "epoch": 8.108711839166046, "grad_norm": 0.0036605673490676706, "kl": 0.27587890625, "learning_rate": 8.73951566742221e-08, "loss": 0.00027596246218308806, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5441, "train_speed(iter/s)": 0.027068 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 89.15625381469727, "completions/min_length": 37.25, "epoch": 8.110201042442293, "grad_norm": 0.003274649519180731, "kl": 0.30126953125, "learning_rate": 8.726162448469826e-08, "loss": 0.00030083174351602793, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5442, "train_speed(iter/s)": 0.027066 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 88.22916984558105, "completions/min_length": 41.5, "epoch": 8.11169024571854, "grad_norm": 0.002839971123809981, "kl": 0.29833984375, "learning_rate": 8.712818463240513e-08, "loss": 0.00029826577519997954, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5443, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 94.53125190734863, "completions/min_length": 33.0, "epoch": 8.113179448994789, "grad_norm": 2.071973456403394, "kl": 0.277099609375, "learning_rate": 8.699483714719546e-08, "loss": 0.0015388932079076767, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5444, "train_speed(iter/s)": 0.027065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 104.04166793823242, "completions/min_length": 38.75, "epoch": 8.114668652271035, "grad_norm": 1.4800538489226969, "kl": 0.322998046875, "learning_rate": 8.686158205890192e-08, "loss": -0.02113386057317257, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5445, "train_speed(iter/s)": 0.027066 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.25, "completions/mean_length": 89.08333587646484, "completions/min_length": 36.25, "epoch": 8.116157855547282, "grad_norm": 2.1001981336744153, "kl": 0.353759765625, "learning_rate": 8.672841939733593e-08, "loss": 0.018023867160081863, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.12766291946172714, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3261406943202019, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5446, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.75, "completions/mean_length": 100.02083778381348, "completions/min_length": 41.0, "epoch": 8.117647058823529, "grad_norm": 0.0027084675665945456, "kl": 0.27294921875, "learning_rate": 8.659534919228844e-08, "loss": 0.0002726803068071604, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5447, "train_speed(iter/s)": 0.027063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.75, "completions/mean_length": 96.47916984558105, "completions/min_length": 44.0, "epoch": 8.119136262099776, "grad_norm": 0.5998725174279157, "kl": 0.30712890625, "learning_rate": 8.64623714735298e-08, "loss": -0.007402792572975159, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5448, "train_speed(iter/s)": 0.027062 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.75, "completions/mean_length": 95.71875190734863, "completions/min_length": 40.0, "epoch": 8.120625465376024, "grad_norm": 0.0027772084928063, "kl": 0.287109375, "learning_rate": 8.632948627080944e-08, "loss": 0.00028701196424663067, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5449, "train_speed(iter/s)": 0.027062 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 96.82291984558105, "completions/min_length": 39.25, "epoch": 8.122114668652271, "grad_norm": 0.0024271639668632166, "kl": 0.26953125, "learning_rate": 8.619669361385662e-08, "loss": 0.00026953715132549405, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5450, "train_speed(iter/s)": 0.027062 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.5, "completions/mean_length": 107.54166793823242, "completions/min_length": 46.5, "epoch": 8.123603871928518, "grad_norm": 0.009706286645169503, "kl": 0.257080078125, "learning_rate": 8.606399353237897e-08, "loss": 0.0002568193303886801, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5451, "train_speed(iter/s)": 0.02706 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.25, "completions/mean_length": 79.48958587646484, "completions/min_length": 31.75, "epoch": 8.125093075204765, "grad_norm": 0.002879289146425095, "kl": 0.330078125, "learning_rate": 8.593138605606442e-08, "loss": 0.0003302955301478505, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5452, "train_speed(iter/s)": 0.027061 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.25, "completions/mean_length": 98.29166984558105, "completions/min_length": 38.5, "epoch": 8.126582278481013, "grad_norm": 0.002678169404406366, "kl": 0.281005859375, "learning_rate": 8.579887121457951e-08, "loss": 0.00028013912378810346, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5453, "train_speed(iter/s)": 0.027061 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 90.37500381469727, "completions/min_length": 44.0, "epoch": 8.12807148175726, "grad_norm": 0.9334435460783054, "kl": 0.272705078125, "learning_rate": 8.56664490375702e-08, "loss": -0.017323998734354973, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333432674408, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5454, "train_speed(iter/s)": 0.027061 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 90.77083587646484, "completions/min_length": 32.0, "epoch": 8.129560685033507, "grad_norm": 0.002682741074057376, "kl": 0.266845703125, "learning_rate": 8.553411955466211e-08, "loss": 0.0002668502856977284, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5455, "train_speed(iter/s)": 0.027063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.25, "completions/mean_length": 102.73958587646484, "completions/min_length": 45.5, "epoch": 8.131049888309754, "grad_norm": 0.0037457378605320014, "kl": 0.26611328125, "learning_rate": 8.54018827954594e-08, "loss": 0.00026654667453840375, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5456, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.25, "completions/mean_length": 102.80208587646484, "completions/min_length": 42.0, "epoch": 8.132539091586002, "grad_norm": 0.002402139785291689, "kl": 0.244873046875, "learning_rate": 8.526973878954618e-08, "loss": 0.0002445820136927068, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5457, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 101.13541984558105, "completions/min_length": 42.25, "epoch": 8.134028294862249, "grad_norm": 0.0025674785866228443, "kl": 0.26318359375, "learning_rate": 8.513768756648537e-08, "loss": 0.0002636833814904094, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5458, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.0, "completions/mean_length": 101.14583587646484, "completions/min_length": 44.75, "epoch": 8.135517498138496, "grad_norm": 0.7838095103383959, "kl": 0.2578125, "learning_rate": 8.500572915581921e-08, "loss": -0.005866519641131163, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5459, "train_speed(iter/s)": 0.027065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 94.92708587646484, "completions/min_length": 42.0, "epoch": 8.137006701414743, "grad_norm": 0.0028576154573352656, "kl": 0.26025390625, "learning_rate": 8.487386358706966e-08, "loss": 0.0002604247711133212, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5460, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.0, "completions/mean_length": 103.73958396911621, "completions/min_length": 36.75, "epoch": 8.13849590469099, "grad_norm": 0.002460859002536389, "kl": 0.26708984375, "learning_rate": 8.474209088973688e-08, "loss": 0.00026711737154982984, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5461, "train_speed(iter/s)": 0.027065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 96.13541793823242, "completions/min_length": 38.0, "epoch": 8.139985107967238, "grad_norm": 0.923100814103181, "kl": 0.268798828125, "learning_rate": 8.46104110933013e-08, "loss": -0.002055415650829673, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5462, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.75, "completions/mean_length": 103.63542175292969, "completions/min_length": 40.25, "epoch": 8.141474311243485, "grad_norm": 0.0032894103109207843, "kl": 0.28466796875, "learning_rate": 8.447882422722197e-08, "loss": 0.0002848822623491287, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5463, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 89.28125190734863, "completions/min_length": 33.0, "epoch": 8.142963514519732, "grad_norm": 0.004996881803334138, "kl": 0.2900390625, "learning_rate": 8.434733032093738e-08, "loss": 0.00029055116465315223, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5464, "train_speed(iter/s)": 0.027061 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 100.87500190734863, "completions/min_length": 30.75, "epoch": 8.144452717795978, "grad_norm": 0.0023749685960848654, "kl": 0.255859375, "learning_rate": 8.421592940386512e-08, "loss": 0.000255649967584759, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5465, "train_speed(iter/s)": 0.027063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 94.79166793823242, "completions/min_length": 40.25, "epoch": 8.145941921072227, "grad_norm": 0.020346773736747883, "kl": 0.27978515625, "learning_rate": 8.40846215054019e-08, "loss": 0.00027973539545200765, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5466, "train_speed(iter/s)": 0.027063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 102.12500381469727, "completions/min_length": 46.0, "epoch": 8.147431124348474, "grad_norm": 0.00298467103052778, "kl": 0.2705078125, "learning_rate": 8.395340665492401e-08, "loss": 0.00027068040799349546, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5467, "train_speed(iter/s)": 0.027065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 93.86458587646484, "completions/min_length": 39.75, "epoch": 8.14892032762472, "grad_norm": 0.0026991833987062867, "kl": 0.2919921875, "learning_rate": 8.382228488178638e-08, "loss": 0.0002921349951066077, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5468, "train_speed(iter/s)": 0.027063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.5, "completions/mean_length": 93.67708778381348, "completions/min_length": 36.5, "epoch": 8.150409530900967, "grad_norm": 0.9992856413176954, "kl": 0.29345703125, "learning_rate": 8.369125621532358e-08, "loss": 0.0035407086834311485, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6145833432674408, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5469, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 91.45833396911621, "completions/min_length": 36.5, "epoch": 8.151898734177216, "grad_norm": 0.0027445555550256935, "kl": 0.298828125, "learning_rate": 8.356032068484903e-08, "loss": 0.00029901470406912267, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5470, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.5, "completions/mean_length": 87.43750381469727, "completions/min_length": 39.75, "epoch": 8.153387937453463, "grad_norm": 0.002894343637553365, "kl": 0.2861328125, "learning_rate": 8.342947831965536e-08, "loss": 0.00028586655389517546, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5471, "train_speed(iter/s)": 0.027065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 91.98958396911621, "completions/min_length": 40.0, "epoch": 8.15487714072971, "grad_norm": 0.003943555170475684, "kl": 0.28466796875, "learning_rate": 8.329872914901465e-08, "loss": 0.0002846795250661671, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5472, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 97.67708587646484, "completions/min_length": 42.0, "epoch": 8.156366344005956, "grad_norm": 1.118682392174517, "kl": 0.27294921875, "learning_rate": 8.316807320217779e-08, "loss": 0.022594820708036423, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5473, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 101.27083587646484, "completions/min_length": 40.75, "epoch": 8.157855547282203, "grad_norm": 0.002095940991634972, "kl": 0.26953125, "learning_rate": 8.303751050837499e-08, "loss": 0.0002690872352104634, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5474, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.5, "completions/mean_length": 98.94791984558105, "completions/min_length": 35.5, "epoch": 8.159344750558452, "grad_norm": 0.0026800251942181914, "kl": 0.29052734375, "learning_rate": 8.290704109681546e-08, "loss": 0.00029044956318102777, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5475, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 98.70833396911621, "completions/min_length": 35.25, "epoch": 8.160833953834699, "grad_norm": 0.0027931895474249406, "kl": 0.2802734375, "learning_rate": 8.277666499668773e-08, "loss": 0.00028024372295476496, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5476, "train_speed(iter/s)": 0.027065 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 99.66666793823242, "completions/min_length": 30.75, "epoch": 8.162323157110945, "grad_norm": 1.7005683400832217, "kl": 0.249755859375, "learning_rate": 8.264638223715913e-08, "loss": -0.006308205425739288, "memory(GiB)": 112.53, "reward": 1.6979166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6979166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5477, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 98.41666793823242, "completions/min_length": 32.5, "epoch": 8.163812360387192, "grad_norm": 0.00282524766087782, "kl": 0.26318359375, "learning_rate": 8.251619284737665e-08, "loss": 0.00026306582731194794, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5478, "train_speed(iter/s)": 0.027066 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 100.45833587646484, "completions/min_length": 39.0, "epoch": 8.16530156366344, "grad_norm": 0.0025069930413948983, "kl": 0.25048828125, "learning_rate": 8.238609685646591e-08, "loss": 0.000250607990892604, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5479, "train_speed(iter/s)": 0.027066 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 90.35416984558105, "completions/min_length": 41.25, "epoch": 8.166790766939688, "grad_norm": 0.002632425251244051, "kl": 0.2890625, "learning_rate": 8.225609429353186e-08, "loss": 0.00028913310961797833, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5480, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 88.50000190734863, "completions/min_length": 33.0, "epoch": 8.168279970215934, "grad_norm": 0.0027339795903059947, "kl": 0.28369140625, "learning_rate": 8.212618518765835e-08, "loss": 0.0002837942447513342, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5481, "train_speed(iter/s)": 0.027067 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.75, "completions/mean_length": 96.14583587646484, "completions/min_length": 38.75, "epoch": 8.169769173492181, "grad_norm": 0.002508305497366888, "kl": 0.28125, "learning_rate": 8.19963695679085e-08, "loss": 0.0002806357806548476, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5482, "train_speed(iter/s)": 0.027068 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 89.48958587646484, "completions/min_length": 49.0, "epoch": 8.17125837676843, "grad_norm": 0.002850386319577178, "kl": 0.2919921875, "learning_rate": 8.186664746332455e-08, "loss": 0.00029192917281761765, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5483, "train_speed(iter/s)": 0.027069 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 91.80208587646484, "completions/min_length": 37.5, "epoch": 8.172747580044676, "grad_norm": 0.0026899964160576003, "kl": 0.2763671875, "learning_rate": 8.173701890292778e-08, "loss": 0.00027636863524094224, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5484, "train_speed(iter/s)": 0.027069 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 96.54166984558105, "completions/min_length": 34.75, "epoch": 8.174236783320923, "grad_norm": 1.5182244722954463, "kl": 0.2841796875, "learning_rate": 8.160748391571843e-08, "loss": 0.007157423999160528, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5485, "train_speed(iter/s)": 0.02707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 90.91666793823242, "completions/min_length": 40.5, "epoch": 8.17572598659717, "grad_norm": 0.0028618992404773093, "kl": 0.2744140625, "learning_rate": 8.14780425306758e-08, "loss": 0.0002744130324572325, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5486, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 95.28125190734863, "completions/min_length": 33.0, "epoch": 8.177215189873417, "grad_norm": 0.0025668074211766256, "kl": 0.2783203125, "learning_rate": 8.134869477675831e-08, "loss": 0.0002781245275400579, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5487, "train_speed(iter/s)": 0.027069 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.0, "completions/mean_length": 103.03125381469727, "completions/min_length": 36.25, "epoch": 8.178704393149665, "grad_norm": 0.0032134475460316664, "kl": 0.263671875, "learning_rate": 8.121944068290381e-08, "loss": 0.0002637691795825958, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5488, "train_speed(iter/s)": 0.02707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.0, "completions/mean_length": 96.06250381469727, "completions/min_length": 41.0, "epoch": 8.180193596425912, "grad_norm": 0.002495058924122653, "kl": 0.2685546875, "learning_rate": 8.109028027802834e-08, "loss": 0.0002684196806512773, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5489, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 86.25000381469727, "completions/min_length": 36.25, "epoch": 8.181682799702159, "grad_norm": 0.003037821271419059, "kl": 0.2958984375, "learning_rate": 8.096121359102786e-08, "loss": 0.0002960729761980474, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5490, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 104.57291793823242, "completions/min_length": 39.5, "epoch": 8.183172002978406, "grad_norm": 0.0025464562884956656, "kl": 0.2568359375, "learning_rate": 8.083224065077677e-08, "loss": 0.0002569371135905385, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5491, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 96.53125190734863, "completions/min_length": 42.0, "epoch": 8.184661206254654, "grad_norm": 0.0027718875244477966, "kl": 0.26953125, "learning_rate": 8.070336148612872e-08, "loss": 0.000269779353402555, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5492, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 94.52083396911621, "completions/min_length": 34.0, "epoch": 8.186150409530901, "grad_norm": 0.002400852822771894, "kl": 0.296875, "learning_rate": 8.057457612591661e-08, "loss": 0.00029696233104914427, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5493, "train_speed(iter/s)": 0.027074 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.0, "completions/mean_length": 87.15625190734863, "completions/min_length": 30.75, "epoch": 8.187639612807148, "grad_norm": 0.002595795661253943, "kl": 0.3037109375, "learning_rate": 8.044588459895168e-08, "loss": 0.00030342635000124574, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5494, "train_speed(iter/s)": 0.027075 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 99.28125190734863, "completions/min_length": 40.75, "epoch": 8.189128816083395, "grad_norm": 0.6885895476014046, "kl": 0.3955078125, "learning_rate": 8.031728693402501e-08, "loss": 0.012528089806437492, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5495, "train_speed(iter/s)": 0.027073 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.25, "completions/mean_length": 85.36458587646484, "completions/min_length": 34.0, "epoch": 8.190618019359643, "grad_norm": 2.1784087623540502, "kl": 0.33154296875, "learning_rate": 8.018878315990619e-08, "loss": 0.007382351439446211, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5496, "train_speed(iter/s)": 0.027069 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 86.95833587646484, "completions/min_length": 35.75, "epoch": 8.19210722263589, "grad_norm": 0.0027140452911045753, "kl": 0.287841796875, "learning_rate": 8.006037330534366e-08, "loss": 0.0002878929954022169, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5497, "train_speed(iter/s)": 0.02707 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 90.67708587646484, "completions/min_length": 32.5, "epoch": 8.193596425912137, "grad_norm": 1.5986615458699291, "kl": 0.281982421875, "learning_rate": 7.99320573990655e-08, "loss": 0.008288050070405006, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5498, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 92.05208396911621, "completions/min_length": 35.25, "epoch": 8.195085629188384, "grad_norm": 0.003215043977970953, "kl": 0.30126953125, "learning_rate": 7.980383546977787e-08, "loss": 0.000301083957310766, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5499, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.25, "completions/mean_length": 94.95833396911621, "completions/min_length": 34.75, "epoch": 8.19657483246463, "grad_norm": 0.002355254406605128, "kl": 0.268798828125, "learning_rate": 7.967570754616692e-08, "loss": 0.00026868816348724067, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5500, "train_speed(iter/s)": 0.027071 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.25, "completions/mean_length": 90.51041984558105, "completions/min_length": 36.0, "epoch": 8.19806403574088, "grad_norm": 0.0024958152519211903, "kl": 0.30859375, "learning_rate": 7.954767365689675e-08, "loss": 0.00030881224665790796, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5501, "train_speed(iter/s)": 0.027064 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 94.83333587646484, "completions/min_length": 41.25, "epoch": 8.199553239017126, "grad_norm": 1.4767921410939087, "kl": 0.285888671875, "learning_rate": 7.941973383061123e-08, "loss": 0.003458046354353428, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.4847914054989815, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5502, "train_speed(iter/s)": 0.027063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 99.30208587646484, "completions/min_length": 37.0, "epoch": 8.201042442293373, "grad_norm": 0.002384879714224797, "kl": 0.28759765625, "learning_rate": 7.929188809593279e-08, "loss": 0.00028749104239977896, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5503, "train_speed(iter/s)": 0.027063 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 96.51041984558105, "completions/min_length": 31.75, "epoch": 8.20253164556962, "grad_norm": 0.004083956478748064, "kl": 0.2939453125, "learning_rate": 7.91641364814628e-08, "loss": 0.0002939140540547669, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5504, "train_speed(iter/s)": 0.027062 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 103.85416793823242, "completions/min_length": 46.5, "epoch": 8.204020848845868, "grad_norm": 0.0028424130763369227, "kl": 0.28759765625, "learning_rate": 7.903647901578181e-08, "loss": 0.0002873336197808385, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5505, "train_speed(iter/s)": 0.027059 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.0, "completions/mean_length": 107.02083587646484, "completions/min_length": 42.5, "epoch": 8.205510052122115, "grad_norm": 0.0026042244006580103, "kl": 0.265625, "learning_rate": 7.890891572744912e-08, "loss": 0.0002659304882399738, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5506, "train_speed(iter/s)": 0.027057 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.25, "completions/mean_length": 114.83333778381348, "completions/min_length": 36.25, "epoch": 8.206999255398362, "grad_norm": 0.0024385847155278966, "kl": 0.247802734375, "learning_rate": 7.878144664500303e-08, "loss": 0.0002478132664691657, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5507, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 103.23958587646484, "completions/min_length": 41.0, "epoch": 8.208488458674609, "grad_norm": 0.00249893341562154, "kl": 0.242431640625, "learning_rate": 7.865407179696065e-08, "loss": 0.00024224596563726664, "memory(GiB)": 112.53, "reward": 1.5000000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5508, "train_speed(iter/s)": 0.027054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 94.55208587646484, "completions/min_length": 32.75, "epoch": 8.209977661950857, "grad_norm": 0.0026331247107787394, "kl": 0.258056640625, "learning_rate": 7.852679121181804e-08, "loss": 0.0002579306601546705, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5509, "train_speed(iter/s)": 0.027053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 79.86458587646484, "completions/min_length": 32.75, "epoch": 8.211466865227104, "grad_norm": 0.0028628625310284017, "kl": 0.31298828125, "learning_rate": 7.839960491805047e-08, "loss": 0.00031290538026951253, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5510, "train_speed(iter/s)": 0.027054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 93.10416984558105, "completions/min_length": 30.75, "epoch": 8.21295606850335, "grad_norm": 0.0031207769550156386, "kl": 0.30419921875, "learning_rate": 7.827251294411169e-08, "loss": 0.00030359876109287143, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5511, "train_speed(iter/s)": 0.027053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.25, "completions/mean_length": 110.63542175292969, "completions/min_length": 45.5, "epoch": 8.214445271779597, "grad_norm": 0.002589355204111071, "kl": 0.25634765625, "learning_rate": 7.81455153184346e-08, "loss": 0.0002558846608735621, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5512, "train_speed(iter/s)": 0.027051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 97.28125381469727, "completions/min_length": 40.75, "epoch": 8.215934475055844, "grad_norm": 0.0023794568545481456, "kl": 0.27294921875, "learning_rate": 7.80186120694309e-08, "loss": 0.0002726631937548518, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5513, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 93.06250381469727, "completions/min_length": 42.25, "epoch": 8.217423678332093, "grad_norm": 2.0658125693497333, "kl": 0.29443359375, "learning_rate": 7.789180322549116e-08, "loss": -0.009593216702342033, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5514, "train_speed(iter/s)": 0.027049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 97.79166793823242, "completions/min_length": 36.75, "epoch": 8.21891288160834, "grad_norm": 0.026187682590491385, "kl": 0.2919921875, "learning_rate": 7.776508881498483e-08, "loss": 0.0002915640943683684, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5515, "train_speed(iter/s)": 0.027049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 168.5, "completions/mean_length": 82.90625190734863, "completions/min_length": 36.5, "epoch": 8.220402084884586, "grad_norm": 0.0030942968162758493, "kl": 0.30712890625, "learning_rate": 7.763846886626046e-08, "loss": 0.00030763758695684373, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5516, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 95.17708587646484, "completions/min_length": 36.75, "epoch": 8.221891288160833, "grad_norm": 1.2550781461147134, "kl": 0.267333984375, "learning_rate": 7.751194340764522e-08, "loss": 0.004813713952898979, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5517, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.75, "completions/mean_length": 90.78125190734863, "completions/min_length": 34.25, "epoch": 8.223380491437082, "grad_norm": 0.0030670913965125516, "kl": 0.28857421875, "learning_rate": 7.738551246744513e-08, "loss": 0.0002887671289499849, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5518, "train_speed(iter/s)": 0.027052 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.25, "completions/mean_length": 108.84375190734863, "completions/min_length": 33.5, "epoch": 8.224869694713329, "grad_norm": 0.0024056899071759922, "kl": 0.260498046875, "learning_rate": 7.725917607394511e-08, "loss": 0.0002604518085718155, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5519, "train_speed(iter/s)": 0.027052 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 392.75, "completions/mean_length": 103.71875190734863, "completions/min_length": 36.5, "epoch": 8.226358897989575, "grad_norm": 0.0024119291506683703, "kl": 0.277587890625, "learning_rate": 7.713293425540896e-08, "loss": 0.00027731654699891806, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5520, "train_speed(iter/s)": 0.027052 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.5, "completions/mean_length": 98.03125190734863, "completions/min_length": 37.75, "epoch": 8.227848101265822, "grad_norm": 0.0024462284159429562, "kl": 0.26318359375, "learning_rate": 7.700678704007946e-08, "loss": 0.00026296349824406207, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5521, "train_speed(iter/s)": 0.027052 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.75, "completions/mean_length": 91.26041984558105, "completions/min_length": 41.5, "epoch": 8.22933730454207, "grad_norm": 0.00342500840951209, "kl": 0.280517578125, "learning_rate": 7.688073445617798e-08, "loss": 0.000280793261481449, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5522, "train_speed(iter/s)": 0.027052 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 87.39583587646484, "completions/min_length": 32.0, "epoch": 8.230826507818318, "grad_norm": 0.0028515124498044945, "kl": 0.28125, "learning_rate": 7.675477653190482e-08, "loss": 0.00028122332878410816, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5523, "train_speed(iter/s)": 0.027051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 82.09375190734863, "completions/min_length": 37.75, "epoch": 8.232315711094564, "grad_norm": 0.002627223790218814, "kl": 0.309814453125, "learning_rate": 7.662891329543913e-08, "loss": 0.00030952764791436493, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5524, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 106.84375381469727, "completions/min_length": 40.5, "epoch": 8.233804914370811, "grad_norm": 0.002555683212379497, "kl": 0.276123046875, "learning_rate": 7.650314477493874e-08, "loss": 0.0002760471252258867, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5525, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 90.16666984558105, "completions/min_length": 34.0, "epoch": 8.235294117647058, "grad_norm": 0.0027833148141958204, "kl": 0.274658203125, "learning_rate": 7.637747099854069e-08, "loss": 0.0002745218516793102, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5526, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 83.73958778381348, "completions/min_length": 33.75, "epoch": 8.236783320923307, "grad_norm": 0.029236807862969356, "kl": 0.287841796875, "learning_rate": 7.62518919943601e-08, "loss": 0.000287259288597852, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5527, "train_speed(iter/s)": 0.027051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.5, "completions/mean_length": 92.72916984558105, "completions/min_length": 38.0, "epoch": 8.238272524199553, "grad_norm": 0.002757158339370521, "kl": 0.27001953125, "learning_rate": 7.612640779049173e-08, "loss": 0.0002699613105505705, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5528, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 93.48958587646484, "completions/min_length": 37.0, "epoch": 8.2397617274758, "grad_norm": 0.9726221262605325, "kl": 0.30322265625, "learning_rate": 7.600101841500855e-08, "loss": -0.001936592161655426, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5529, "train_speed(iter/s)": 0.027051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 89.23958587646484, "completions/min_length": 40.75, "epoch": 8.241250930752047, "grad_norm": 0.0026756435943693915, "kl": 0.2763671875, "learning_rate": 7.587572389596236e-08, "loss": 0.00027629482792690396, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5530, "train_speed(iter/s)": 0.027052 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 93.48958587646484, "completions/min_length": 38.0, "epoch": 8.242740134028296, "grad_norm": 0.0032369546846633787, "kl": 0.28173828125, "learning_rate": 7.575052426138422e-08, "loss": 0.00028216963983140886, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5531, "train_speed(iter/s)": 0.027051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 93.92708778381348, "completions/min_length": 39.5, "epoch": 8.244229337304542, "grad_norm": 0.0028558865517495304, "kl": 0.2744140625, "learning_rate": 7.562541953928314e-08, "loss": 0.00027452275389805436, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5532, "train_speed(iter/s)": 0.027053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 85.93750190734863, "completions/min_length": 36.75, "epoch": 8.24571854058079, "grad_norm": 0.003770245468148353, "kl": 0.28662109375, "learning_rate": 7.550040975764782e-08, "loss": 0.00028706860030069947, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5533, "train_speed(iter/s)": 0.027053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.25, "completions/mean_length": 99.58333587646484, "completions/min_length": 37.5, "epoch": 8.247207743857036, "grad_norm": 0.6977480866543492, "kl": 0.26708984375, "learning_rate": 7.537549494444501e-08, "loss": -0.005173941608518362, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5534, "train_speed(iter/s)": 0.027054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 88.63541793823242, "completions/min_length": 31.25, "epoch": 8.248696947133284, "grad_norm": 0.0024870791228786848, "kl": 0.28955078125, "learning_rate": 7.52506751276204e-08, "loss": 0.00028903444763273, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5535, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 102.57291984558105, "completions/min_length": 43.5, "epoch": 8.250186150409531, "grad_norm": 0.0023160083257836046, "kl": 0.25634765625, "learning_rate": 7.512595033509888e-08, "loss": 0.00025665349676273763, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5536, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 105.57291984558105, "completions/min_length": 38.5, "epoch": 8.251675353685778, "grad_norm": 0.0026947049059826912, "kl": 0.23828125, "learning_rate": 7.500132059478326e-08, "loss": 0.00023820844944566488, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5537, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 98.23958778381348, "completions/min_length": 40.5, "epoch": 8.253164556962025, "grad_norm": 0.002655557480327839, "kl": 0.27587890625, "learning_rate": 7.487678593455587e-08, "loss": 0.0002759342605713755, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5538, "train_speed(iter/s)": 0.027057 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 93.40625, "completions/min_length": 41.0, "epoch": 8.254653760238272, "grad_norm": 0.002547507617091419, "kl": 0.28759765625, "learning_rate": 7.475234638227706e-08, "loss": 0.00028761420981027186, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5539, "train_speed(iter/s)": 0.027058 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.25, "completions/mean_length": 91.67708587646484, "completions/min_length": 39.25, "epoch": 8.25614296351452, "grad_norm": 0.0028202798552225804, "kl": 0.282470703125, "learning_rate": 7.462800196578661e-08, "loss": 0.0002822811948135495, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5540, "train_speed(iter/s)": 0.027059 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 88.36458587646484, "completions/min_length": 33.75, "epoch": 8.257632166790767, "grad_norm": 0.0026581122907211193, "kl": 0.29150390625, "learning_rate": 7.450375271290249e-08, "loss": 0.0002918136597145349, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5541, "train_speed(iter/s)": 0.027059 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 85.70833587646484, "completions/min_length": 31.0, "epoch": 8.259121370067014, "grad_norm": 1.5793510259372052, "kl": 0.3095703125, "learning_rate": 7.437959865142152e-08, "loss": 0.02561575174331665, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5542, "train_speed(iter/s)": 0.027058 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 102.54166793823242, "completions/min_length": 35.5, "epoch": 8.26061057334326, "grad_norm": 0.0023068039354289358, "kl": 0.2548828125, "learning_rate": 7.425553980911959e-08, "loss": 0.0002549980999901891, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5543, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.25, "completions/mean_length": 102.41666984558105, "completions/min_length": 35.5, "epoch": 8.26209977661951, "grad_norm": 0.0024079608804825916, "kl": 0.2705078125, "learning_rate": 7.413157621375049e-08, "loss": 0.0002704601502045989, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5544, "train_speed(iter/s)": 0.027053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.0, "completions/mean_length": 96.28125381469727, "completions/min_length": 48.0, "epoch": 8.263588979895756, "grad_norm": 0.003857428078028663, "kl": 0.291015625, "learning_rate": 7.400770789304755e-08, "loss": 0.0002903662098105997, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5545, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 100.58333587646484, "completions/min_length": 40.0, "epoch": 8.265078183172003, "grad_norm": 0.0025517714962297157, "kl": 0.2509765625, "learning_rate": 7.388393487472222e-08, "loss": 0.0002504680014681071, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5546, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 96.66666793823242, "completions/min_length": 41.5, "epoch": 8.26656738644825, "grad_norm": 0.0023553583296280186, "kl": 0.27099609375, "learning_rate": 7.376025718646484e-08, "loss": 0.00027105672052130103, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5547, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.25, "completions/mean_length": 94.90625190734863, "completions/min_length": 44.75, "epoch": 8.268056589724498, "grad_norm": 0.0027876194635253187, "kl": 0.2744140625, "learning_rate": 7.36366748559446e-08, "loss": 0.0002748273254837841, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5548, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 81.42708587646484, "completions/min_length": 36.5, "epoch": 8.269545793000745, "grad_norm": 0.00273619641837441, "kl": 0.30419921875, "learning_rate": 7.351318791080879e-08, "loss": 0.0003045794437639415, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5549, "train_speed(iter/s)": 0.027057 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 90.31250190734863, "completions/min_length": 36.5, "epoch": 8.271034996276992, "grad_norm": 0.002772108779308857, "kl": 0.283935546875, "learning_rate": 7.338979637868404e-08, "loss": 0.0002840389497578144, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5550, "train_speed(iter/s)": 0.027058 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 87.63541793823242, "completions/min_length": 44.25, "epoch": 8.272524199553239, "grad_norm": 0.003956811835733305, "kl": 0.29931640625, "learning_rate": 7.326650028717523e-08, "loss": 0.00029902326059527695, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5551, "train_speed(iter/s)": 0.027057 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 83.33333778381348, "completions/min_length": 39.5, "epoch": 8.274013402829485, "grad_norm": 0.0027084482967574446, "kl": 0.29931640625, "learning_rate": 7.314329966386596e-08, "loss": 0.00029968988383188844, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5552, "train_speed(iter/s)": 0.027059 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 105.50000190734863, "completions/min_length": 39.5, "epoch": 8.275502606105734, "grad_norm": 0.002631990080265016, "kl": 0.260009765625, "learning_rate": 7.302019453631853e-08, "loss": 0.00025969927082769573, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5553, "train_speed(iter/s)": 0.027059 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 90.80208587646484, "completions/min_length": 37.75, "epoch": 8.27699180938198, "grad_norm": 0.003579210890428898, "kl": 0.3017578125, "learning_rate": 7.289718493207369e-08, "loss": 0.0003020745643880218, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5554, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 84.70833587646484, "completions/min_length": 41.25, "epoch": 8.278481012658228, "grad_norm": 0.0027560957909688426, "kl": 0.3017578125, "learning_rate": 7.277427087865123e-08, "loss": 0.00030218958272598684, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5555, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 91.41666984558105, "completions/min_length": 37.25, "epoch": 8.279970215934474, "grad_norm": 0.00245798127008219, "kl": 0.29248046875, "learning_rate": 7.26514524035492e-08, "loss": 0.0002917287638410926, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5556, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 97.94791984558105, "completions/min_length": 41.5, "epoch": 8.281459419210723, "grad_norm": 0.6001716247673095, "kl": 0.277587890625, "learning_rate": 7.252872953424427e-08, "loss": 0.010036464780569077, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5557, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 100.05208587646484, "completions/min_length": 45.0, "epoch": 8.28294862248697, "grad_norm": 0.0026289164263518425, "kl": 0.2919921875, "learning_rate": 7.240610229819195e-08, "loss": 0.0002920440165325999, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5558, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 180.5, "completions/mean_length": 88.55208587646484, "completions/min_length": 40.25, "epoch": 8.284437825763217, "grad_norm": 0.03087085446868528, "kl": 0.298828125, "learning_rate": 7.228357072282609e-08, "loss": 0.00029892317252233624, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5559, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.25, "completions/mean_length": 102.36458587646484, "completions/min_length": 36.0, "epoch": 8.285927029039463, "grad_norm": 0.00257530098859152, "kl": 0.255126953125, "learning_rate": 7.216113483555941e-08, "loss": 0.0002554544189479202, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5560, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 92.84375381469727, "completions/min_length": 31.0, "epoch": 8.287416232315712, "grad_norm": 0.0023677778487752182, "kl": 0.28515625, "learning_rate": 7.20387946637831e-08, "loss": 0.0002852292964234948, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5561, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 96.28125, "completions/min_length": 46.0, "epoch": 8.288905435591959, "grad_norm": 0.0026804295668226463, "kl": 0.27783203125, "learning_rate": 7.191655023486681e-08, "loss": 0.0002784220559988171, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5562, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.75, "completions/mean_length": 98.28125381469727, "completions/min_length": 35.75, "epoch": 8.290394638868205, "grad_norm": 0.0027620610947943283, "kl": 0.261474609375, "learning_rate": 7.179440157615885e-08, "loss": 0.0002616267593111843, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5563, "train_speed(iter/s)": 0.027054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 81.66666984558105, "completions/min_length": 31.0, "epoch": 8.291883842144452, "grad_norm": 0.022118434113556545, "kl": 0.353515625, "learning_rate": 7.167234871498645e-08, "loss": 0.00035402586217969656, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5564, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 90.72916793823242, "completions/min_length": 31.5, "epoch": 8.293373045420699, "grad_norm": 1.3232286879104036, "kl": 0.2841796875, "learning_rate": 7.155039167865467e-08, "loss": 0.004078437574207783, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5565, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 94.59375381469727, "completions/min_length": 34.25, "epoch": 8.294862248696948, "grad_norm": 0.0025163472360983613, "kl": 0.277099609375, "learning_rate": 7.142853049444786e-08, "loss": 0.0002769955317489803, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5566, "train_speed(iter/s)": 0.027057 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 93.63541984558105, "completions/min_length": 36.5, "epoch": 8.296351451973194, "grad_norm": 0.002585722854963007, "kl": 0.263671875, "learning_rate": 7.130676518962859e-08, "loss": 0.0002634970878716558, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5567, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 100.43750381469727, "completions/min_length": 42.0, "epoch": 8.297840655249441, "grad_norm": 0.006099018748283733, "kl": 0.2822265625, "learning_rate": 7.118509579143783e-08, "loss": 0.00028236024081707, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5568, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.0, "completions/mean_length": 109.58333587646484, "completions/min_length": 38.25, "epoch": 8.299329858525688, "grad_norm": 0.0023596620694015273, "kl": 0.254638671875, "learning_rate": 7.106352232709561e-08, "loss": 0.0002546719624660909, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5569, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 100.86458587646484, "completions/min_length": 37.25, "epoch": 8.300819061801937, "grad_norm": 0.9951921972024919, "kl": 0.257080078125, "learning_rate": 7.094204482379984e-08, "loss": 0.01000373438000679, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5570, "train_speed(iter/s)": 0.027054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.25, "completions/mean_length": 93.51041984558105, "completions/min_length": 44.0, "epoch": 8.302308265078183, "grad_norm": 0.003428458756077892, "kl": 0.283203125, "learning_rate": 7.082066330872743e-08, "loss": 0.00028345000464469194, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5571, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 93.50000190734863, "completions/min_length": 45.5, "epoch": 8.30379746835443, "grad_norm": 0.00297491049271362, "kl": 0.27685546875, "learning_rate": 7.069937780903379e-08, "loss": 0.00027728950954042375, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5572, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 99.40625190734863, "completions/min_length": 37.5, "epoch": 8.305286671630677, "grad_norm": 0.0025687940637023655, "kl": 0.265869140625, "learning_rate": 7.057818835185242e-08, "loss": 0.0002662026963662356, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5573, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 90.33333587646484, "completions/min_length": 36.5, "epoch": 8.306775874906926, "grad_norm": 0.0025014826396884634, "kl": 0.253173828125, "learning_rate": 7.045709496429614e-08, "loss": 0.00025274435756728053, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5574, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.5, "completions/mean_length": 105.73958587646484, "completions/min_length": 38.75, "epoch": 8.308265078183172, "grad_norm": 0.002230105442401754, "kl": 0.24755859375, "learning_rate": 7.033609767345522e-08, "loss": 0.00024778975057415664, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5575, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 91.11458587646484, "completions/min_length": 38.25, "epoch": 8.30975428145942, "grad_norm": 0.0026139177918563755, "kl": 0.283203125, "learning_rate": 7.021519650639951e-08, "loss": 0.0002831994788721204, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5576, "train_speed(iter/s)": 0.027054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.75, "completions/mean_length": 90.05208587646484, "completions/min_length": 39.25, "epoch": 8.311243484735666, "grad_norm": 0.0025374895240706566, "kl": 0.27392578125, "learning_rate": 7.009439149017643e-08, "loss": 0.00027376748039387167, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5577, "train_speed(iter/s)": 0.027054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 100.02083778381348, "completions/min_length": 36.5, "epoch": 8.312732688011913, "grad_norm": 0.002434266981775622, "kl": 0.26708984375, "learning_rate": 6.997368265181253e-08, "loss": 0.00026687333593145013, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5578, "train_speed(iter/s)": 0.027053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.25, "completions/mean_length": 87.85416984558105, "completions/min_length": 43.25, "epoch": 8.314221891288161, "grad_norm": 0.0028469826088397027, "kl": 0.283447265625, "learning_rate": 6.985307001831264e-08, "loss": 0.0002830260491464287, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5579, "train_speed(iter/s)": 0.027053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 100.72916984558105, "completions/min_length": 40.25, "epoch": 8.315711094564408, "grad_norm": 0.0030247132097188964, "kl": 0.25390625, "learning_rate": 6.973255361665981e-08, "loss": 0.0002537299878895283, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5580, "train_speed(iter/s)": 0.027052 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.5, "completions/mean_length": 94.43750190734863, "completions/min_length": 36.5, "epoch": 8.317200297840655, "grad_norm": 0.00315225354925402, "kl": 0.273681640625, "learning_rate": 6.961213347381622e-08, "loss": 0.0002734450390562415, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5581, "train_speed(iter/s)": 0.027053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 94.13541793823242, "completions/min_length": 43.5, "epoch": 8.318689501116902, "grad_norm": 0.0027495212369149755, "kl": 0.26318359375, "learning_rate": 6.949180961672157e-08, "loss": 0.0002629213558975607, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5582, "train_speed(iter/s)": 0.027053 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 96.23958587646484, "completions/min_length": 40.0, "epoch": 8.32017870439315, "grad_norm": 0.0025486959300738054, "kl": 0.27392578125, "learning_rate": 6.937158207229494e-08, "loss": 0.00027395502547733486, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5583, "train_speed(iter/s)": 0.027054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 99.57291984558105, "completions/min_length": 39.5, "epoch": 8.321667907669397, "grad_norm": 0.002659026425292629, "kl": 0.24658203125, "learning_rate": 6.925145086743328e-08, "loss": 0.0002465310681145638, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5584, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 100.51041984558105, "completions/min_length": 41.25, "epoch": 8.323157110945644, "grad_norm": 0.002337267045773726, "kl": 0.2490234375, "learning_rate": 6.913141602901212e-08, "loss": 0.00024947532801888883, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5585, "train_speed(iter/s)": 0.027054 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.25, "completions/mean_length": 104.77083587646484, "completions/min_length": 48.5, "epoch": 8.32464631422189, "grad_norm": 0.002541263117433111, "kl": 0.2548828125, "learning_rate": 6.901147758388576e-08, "loss": 0.0002550540666561574, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5586, "train_speed(iter/s)": 0.027056 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 91.40625190734863, "completions/min_length": 44.25, "epoch": 8.32613551749814, "grad_norm": 0.0025237508439639755, "kl": 0.2763671875, "learning_rate": 6.889163555888628e-08, "loss": 0.0002766087418422103, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5587, "train_speed(iter/s)": 0.027055 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 303.75, "completions/mean_length": 114.18750381469727, "completions/min_length": 45.0, "epoch": 8.327624720774386, "grad_norm": 0.0025679572431548964, "kl": 0.2421875, "learning_rate": 6.877188998082484e-08, "loss": 0.00024232517171185464, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5588, "train_speed(iter/s)": 0.027051 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 98.34375190734863, "completions/min_length": 40.25, "epoch": 8.329113924050633, "grad_norm": 0.002433498173270766, "kl": 0.275390625, "learning_rate": 6.865224087649063e-08, "loss": 0.00027523981407284737, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5589, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 88.31250190734863, "completions/min_length": 31.5, "epoch": 8.33060312732688, "grad_norm": 0.002692797810202356, "kl": 0.268798828125, "learning_rate": 6.853268827265141e-08, "loss": 0.00026864794199354947, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5590, "train_speed(iter/s)": 0.027049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 99.03125381469727, "completions/min_length": 44.25, "epoch": 8.332092330603126, "grad_norm": 0.0030094494566502926, "kl": 0.2802734375, "learning_rate": 6.841323219605333e-08, "loss": 0.00028014759300276637, "memory(GiB)": 112.53, "reward": 1.4166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.4166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5591, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.25, "completions/mean_length": 86.29166793823242, "completions/min_length": 42.25, "epoch": 8.333581533879375, "grad_norm": 0.002639374814796946, "kl": 0.28125, "learning_rate": 6.829387267342073e-08, "loss": 0.0002808824647217989, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5592, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.75, "completions/mean_length": 95.57291984558105, "completions/min_length": 40.25, "epoch": 8.335070737155622, "grad_norm": 0.002269519243849166, "kl": 0.26513671875, "learning_rate": 6.817460973145689e-08, "loss": 0.00026516607613302767, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5593, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 89.78125381469727, "completions/min_length": 43.25, "epoch": 8.336559940431869, "grad_norm": 0.006642602576120289, "kl": 0.30810546875, "learning_rate": 6.805544339684293e-08, "loss": 0.0003081034810747951, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5594, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 91.86458587646484, "completions/min_length": 39.75, "epoch": 8.338049143708115, "grad_norm": 0.00273186066293992, "kl": 0.2705078125, "learning_rate": 6.793637369623867e-08, "loss": 0.00027072380180470645, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5595, "train_speed(iter/s)": 0.027049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 91.73958587646484, "completions/min_length": 36.0, "epoch": 8.339538346984364, "grad_norm": 3.1728570436014523, "kl": 0.26904296875, "learning_rate": 6.78174006562821e-08, "loss": -0.03473342955112457, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.31460215896368027, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5596, "train_speed(iter/s)": 0.027049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.5, "completions/mean_length": 95.71875190734863, "completions/min_length": 35.0, "epoch": 8.34102755026061, "grad_norm": 0.0023683229546378626, "kl": 0.275390625, "learning_rate": 6.76985243035897e-08, "loss": 0.0002754027955234051, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5597, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 89.10416984558105, "completions/min_length": 32.75, "epoch": 8.342516753536858, "grad_norm": 0.9209193433207843, "kl": 0.28759765625, "learning_rate": 6.75797446647564e-08, "loss": 0.015055525116622448, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5598, "train_speed(iter/s)": 0.02705 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.75, "completions/mean_length": 93.13541984558105, "completions/min_length": 32.75, "epoch": 8.344005956813104, "grad_norm": 0.0027188789274577743, "kl": 0.281005859375, "learning_rate": 6.746106176635541e-08, "loss": 0.0002804263203870505, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5599, "train_speed(iter/s)": 0.027046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.0, "completions/mean_length": 91.14583587646484, "completions/min_length": 40.25, "epoch": 8.345495160089353, "grad_norm": 0.005256112452376686, "kl": 0.27734375, "learning_rate": 6.734247563493828e-08, "loss": 0.0002768936101347208, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5600, "train_speed(iter/s)": 0.027046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.25, "completions/mean_length": 99.46875190734863, "completions/min_length": 47.5, "epoch": 8.3469843633656, "grad_norm": 0.002354587457729239, "kl": 0.27294921875, "learning_rate": 6.72239862970349e-08, "loss": 0.00027273697196505964, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5601, "train_speed(iter/s)": 0.027044 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 90.79166984558105, "completions/min_length": 41.75, "epoch": 8.348473566641847, "grad_norm": 0.0026826531316452543, "kl": 0.2724609375, "learning_rate": 6.710559377915354e-08, "loss": 0.0002719183685258031, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5602, "train_speed(iter/s)": 0.027046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 100.37500190734863, "completions/min_length": 38.75, "epoch": 8.349962769918093, "grad_norm": 0.0026924479803233986, "kl": 0.260498046875, "learning_rate": 6.698729810778064e-08, "loss": 0.00026076645008288324, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5603, "train_speed(iter/s)": 0.027047 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.0, "completions/mean_length": 88.72916793823242, "completions/min_length": 31.5, "epoch": 8.35145197319434, "grad_norm": 0.006644453156568112, "kl": 0.27587890625, "learning_rate": 6.686909930938145e-08, "loss": 0.0002752910659182817, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5604, "train_speed(iter/s)": 0.027047 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 89.47916984558105, "completions/min_length": 40.25, "epoch": 8.352941176470589, "grad_norm": 0.0048439654709917365, "kl": 0.28125, "learning_rate": 6.675099741039909e-08, "loss": 0.000280864245723933, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5605, "train_speed(iter/s)": 0.027047 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 96.61458587646484, "completions/min_length": 44.75, "epoch": 8.354430379746836, "grad_norm": 0.0026109185195538217, "kl": 0.270751953125, "learning_rate": 6.663299243725512e-08, "loss": 0.00027062796289101243, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5606, "train_speed(iter/s)": 0.027049 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.25, "completions/mean_length": 100.42708587646484, "completions/min_length": 40.75, "epoch": 8.355919583023082, "grad_norm": 0.002766609873427963, "kl": 0.2685546875, "learning_rate": 6.651508441634946e-08, "loss": 0.0002680864417925477, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5607, "train_speed(iter/s)": 0.027046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 319.25, "completions/mean_length": 100.95833396911621, "completions/min_length": 40.0, "epoch": 8.35740878629933, "grad_norm": 0.015053766568442834, "kl": 0.26953125, "learning_rate": 6.639727337406026e-08, "loss": 0.0002698446623980999, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5608, "train_speed(iter/s)": 0.027045 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 99.09375381469727, "completions/min_length": 40.75, "epoch": 8.358897989575578, "grad_norm": 0.002831493477820144, "kl": 0.278564453125, "learning_rate": 6.627955933674411e-08, "loss": 0.0002785640535876155, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5609, "train_speed(iter/s)": 0.027044 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.25, "completions/mean_length": 105.67708396911621, "completions/min_length": 44.5, "epoch": 8.360387192851825, "grad_norm": 0.0029026024013914247, "kl": 0.26416015625, "learning_rate": 6.616194233073591e-08, "loss": 0.0002643200277816504, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5610, "train_speed(iter/s)": 0.027045 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 91.33333396911621, "completions/min_length": 36.5, "epoch": 8.361876396128071, "grad_norm": 0.002914022809686386, "kl": 0.286865234375, "learning_rate": 6.604442238234858e-08, "loss": 0.00028641853714361787, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5611, "train_speed(iter/s)": 0.027044 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.75, "completions/mean_length": 100.81250190734863, "completions/min_length": 38.75, "epoch": 8.363365599404318, "grad_norm": 0.0029583385986323525, "kl": 0.269287109375, "learning_rate": 6.592699951787362e-08, "loss": 0.0002691585978027433, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5612, "train_speed(iter/s)": 0.027041 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.75, "completions/mean_length": 82.20833778381348, "completions/min_length": 41.5, "epoch": 8.364854802680567, "grad_norm": 0.0029589284548161158, "kl": 0.3037109375, "learning_rate": 6.58096737635806e-08, "loss": 0.00030362396501004696, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5613, "train_speed(iter/s)": 0.027043 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 111.81250190734863, "completions/min_length": 40.25, "epoch": 8.366344005956813, "grad_norm": 1.5918986766714966, "kl": 0.249755859375, "learning_rate": 6.569244514571775e-08, "loss": 0.007134604267776012, "memory(GiB)": 112.53, "reward": 1.4687500298023224, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.4687500149011612, "rewards/CineAccuracyORM/std": 0.3641507476568222, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5614, "train_speed(iter/s)": 0.027043 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 93.26041793823242, "completions/min_length": 40.25, "epoch": 8.36783320923306, "grad_norm": 0.002758143140048285, "kl": 0.26953125, "learning_rate": 6.55753136905109e-08, "loss": 0.0002690973924472928, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5615, "train_speed(iter/s)": 0.027042 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 93.87500190734863, "completions/min_length": 37.75, "epoch": 8.369322412509307, "grad_norm": 0.0029598552334405203, "kl": 0.28564453125, "learning_rate": 6.545827942416477e-08, "loss": 0.000285507645457983, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5616, "train_speed(iter/s)": 0.027042 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 97.78125190734863, "completions/min_length": 43.0, "epoch": 8.370811615785554, "grad_norm": 0.0028352231609814226, "kl": 0.27685546875, "learning_rate": 6.534134237286204e-08, "loss": 0.0002768249833025038, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5617, "train_speed(iter/s)": 0.027042 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 110.59375381469727, "completions/min_length": 42.75, "epoch": 8.372300819061802, "grad_norm": 0.008043135523104523, "kl": 0.263427734375, "learning_rate": 6.522450256276363e-08, "loss": 0.0002629276132211089, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5618, "train_speed(iter/s)": 0.027039 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.25, "completions/mean_length": 87.83333587646484, "completions/min_length": 38.75, "epoch": 8.37379002233805, "grad_norm": 1.2157909298179201, "kl": 0.306640625, "learning_rate": 6.5107760020009e-08, "loss": 0.006529897451400757, "memory(GiB)": 112.53, "reward": 1.6458333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6458333507180214, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5619, "train_speed(iter/s)": 0.027039 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 101.45833587646484, "completions/min_length": 42.5, "epoch": 8.375279225614296, "grad_norm": 0.0027152763083074334, "kl": 0.262939453125, "learning_rate": 6.499111477071528e-08, "loss": 0.000263221503701061, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5620, "train_speed(iter/s)": 0.02704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.25, "completions/mean_length": 91.83333587646484, "completions/min_length": 36.0, "epoch": 8.376768428890543, "grad_norm": 2.1230840569347067, "kl": 0.46533203125, "learning_rate": 6.487456684097848e-08, "loss": -0.02446458488702774, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.07400631904602051, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5621, "train_speed(iter/s)": 0.02704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 97.43750381469727, "completions/min_length": 40.5, "epoch": 8.378257632166791, "grad_norm": 0.0022960441069361007, "kl": 0.254638671875, "learning_rate": 6.475811625687239e-08, "loss": 0.0002547081676311791, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5622, "train_speed(iter/s)": 0.02704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 98.51041984558105, "completions/min_length": 47.0, "epoch": 8.379746835443038, "grad_norm": 0.002347802901820915, "kl": 0.251953125, "learning_rate": 6.464176304444908e-08, "loss": 0.00025242185802198946, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5623, "train_speed(iter/s)": 0.027039 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.75, "completions/mean_length": 93.42708396911621, "completions/min_length": 33.0, "epoch": 8.381236038719285, "grad_norm": 0.002228236580528527, "kl": 0.262451171875, "learning_rate": 6.452550722973927e-08, "loss": 0.00026245316257700324, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5624, "train_speed(iter/s)": 0.027038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 92.97916984558105, "completions/min_length": 43.0, "epoch": 8.382725241995532, "grad_norm": 1.2434152513891485, "kl": 0.275634765625, "learning_rate": 6.440934883875104e-08, "loss": 0.0077833142131567, "memory(GiB)": 112.53, "reward": 1.7291667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.43174003064632416, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5625, "train_speed(iter/s)": 0.027038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.25, "completions/mean_length": 96.07291984558105, "completions/min_length": 44.25, "epoch": 8.38421444527178, "grad_norm": 0.002318700702648462, "kl": 0.270263671875, "learning_rate": 6.429328789747163e-08, "loss": 0.0002704016806092113, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5626, "train_speed(iter/s)": 0.027037 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 101.59375381469727, "completions/min_length": 41.25, "epoch": 8.385703648548027, "grad_norm": 0.002295555728921549, "kl": 0.254638671875, "learning_rate": 6.417732443186575e-08, "loss": 0.0002544111048337072, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5627, "train_speed(iter/s)": 0.027039 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 88.26041984558105, "completions/min_length": 41.5, "epoch": 8.387192851824274, "grad_norm": 0.004321386034526735, "kl": 0.30029296875, "learning_rate": 6.406145846787669e-08, "loss": 0.0003001485310960561, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5628, "train_speed(iter/s)": 0.027038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 85.60416984558105, "completions/min_length": 36.5, "epoch": 8.38868205510052, "grad_norm": 0.0025811928852692606, "kl": 0.29638671875, "learning_rate": 6.39456900314258e-08, "loss": 0.00029647329938597977, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5629, "train_speed(iter/s)": 0.027039 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 87.80208587646484, "completions/min_length": 41.75, "epoch": 8.390171258376768, "grad_norm": 1.4242787332327664, "kl": 0.3095703125, "learning_rate": 6.383001914841252e-08, "loss": -0.007364384364336729, "memory(GiB)": 112.53, "reward": 1.5625000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.5625000223517418, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5630, "train_speed(iter/s)": 0.027038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 87.97916984558105, "completions/min_length": 26.5, "epoch": 8.391660461653016, "grad_norm": 0.004636016792101642, "kl": 0.2919921875, "learning_rate": 6.371444584471475e-08, "loss": 0.0002922583371400833, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5631, "train_speed(iter/s)": 0.02704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 94.44791793823242, "completions/min_length": 35.5, "epoch": 8.393149664929263, "grad_norm": 0.0026036308245142546, "kl": 0.279296875, "learning_rate": 6.359897014618831e-08, "loss": 0.0002795537584461272, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5632, "train_speed(iter/s)": 0.027041 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.75, "completions/mean_length": 86.23958396911621, "completions/min_length": 38.0, "epoch": 8.39463886820551, "grad_norm": 0.0026238022056224615, "kl": 0.296875, "learning_rate": 6.348359207866721e-08, "loss": 0.0002964492596220225, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5633, "train_speed(iter/s)": 0.027043 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 86.44791793823242, "completions/min_length": 38.25, "epoch": 8.396128071481757, "grad_norm": 0.0028234303061909238, "kl": 0.291015625, "learning_rate": 6.336831166796374e-08, "loss": 0.0002914954093284905, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5634, "train_speed(iter/s)": 0.027044 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.0, "completions/mean_length": 101.64583587646484, "completions/min_length": 38.75, "epoch": 8.397617274758005, "grad_norm": 0.0022927313993254296, "kl": 0.24951171875, "learning_rate": 6.325312893986812e-08, "loss": 0.00024965842021629214, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5635, "train_speed(iter/s)": 0.027043 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 92.97916984558105, "completions/min_length": 41.75, "epoch": 8.399106478034252, "grad_norm": 0.002316508858976311, "kl": 0.28076171875, "learning_rate": 6.313804392014905e-08, "loss": 0.0002811538288369775, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5636, "train_speed(iter/s)": 0.027043 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.75, "completions/mean_length": 93.07291984558105, "completions/min_length": 43.5, "epoch": 8.400595681310499, "grad_norm": 1.4032531456211257, "kl": 0.2919921875, "learning_rate": 6.302305663455309e-08, "loss": -0.0013018613681197166, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5637, "train_speed(iter/s)": 0.027045 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 92.73958587646484, "completions/min_length": 39.5, "epoch": 8.402084884586746, "grad_norm": 0.0026775017091089422, "kl": 0.289794921875, "learning_rate": 6.290816710880508e-08, "loss": 0.0002892881748266518, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5638, "train_speed(iter/s)": 0.027046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 87.95833396911621, "completions/min_length": 34.0, "epoch": 8.403574087862994, "grad_norm": 0.017965399923834217, "kl": 0.30859375, "learning_rate": 6.279337536860785e-08, "loss": 0.00030884551233612, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5639, "train_speed(iter/s)": 0.027048 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 107.53125190734863, "completions/min_length": 46.75, "epoch": 8.405063291139241, "grad_norm": 0.0022478261871756843, "kl": 0.252197265625, "learning_rate": 6.26786814396425e-08, "loss": 0.0002519471454434097, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5640, "train_speed(iter/s)": 0.027046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.5, "completions/mean_length": 103.55208778381348, "completions/min_length": 41.5, "epoch": 8.406552494415488, "grad_norm": 0.0028098319233248845, "kl": 0.263427734375, "learning_rate": 6.256408534756813e-08, "loss": 0.0002635227283462882, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5641, "train_speed(iter/s)": 0.027046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 94.55208778381348, "completions/min_length": 37.75, "epoch": 8.408041697691734, "grad_norm": 0.002335311360327885, "kl": 0.259521484375, "learning_rate": 6.244958711802212e-08, "loss": 0.0002595692640170455, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5642, "train_speed(iter/s)": 0.027046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 93.14583587646484, "completions/min_length": 42.25, "epoch": 8.409530900967981, "grad_norm": 0.002764099150031541, "kl": 0.27978515625, "learning_rate": 6.233518677661981e-08, "loss": 0.0002798411005642265, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5643, "train_speed(iter/s)": 0.027046 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 92.92708778381348, "completions/min_length": 30.0, "epoch": 8.41102010424423, "grad_norm": 0.00226148215948513, "kl": 0.281005859375, "learning_rate": 6.222088434895461e-08, "loss": 0.000281290034763515, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5644, "train_speed(iter/s)": 0.027047 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.25, "completions/mean_length": 101.48958587646484, "completions/min_length": 36.5, "epoch": 8.412509307520477, "grad_norm": 0.003780129908656686, "kl": 0.265625, "learning_rate": 6.21066798605982e-08, "loss": 0.00026642493321560323, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5645, "train_speed(iter/s)": 0.027044 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 94.73958396911621, "completions/min_length": 46.0, "epoch": 8.413998510796723, "grad_norm": 0.002508305514735796, "kl": 0.30029296875, "learning_rate": 6.19925733371e-08, "loss": 0.00029989410541020334, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5646, "train_speed(iter/s)": 0.027042 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 94.11458587646484, "completions/min_length": 42.5, "epoch": 8.41548771407297, "grad_norm": 0.0025147134558635036, "kl": 0.28857421875, "learning_rate": 6.187856480398812e-08, "loss": 0.0002889110764954239, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5647, "train_speed(iter/s)": 0.027041 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 99.03125190734863, "completions/min_length": 33.75, "epoch": 8.416976917349219, "grad_norm": 0.0022741442405980927, "kl": 0.25732421875, "learning_rate": 6.176465428676819e-08, "loss": 0.00025766732869669795, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5648, "train_speed(iter/s)": 0.027039 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 102.33333587646484, "completions/min_length": 41.75, "epoch": 8.418466120625466, "grad_norm": 0.011770291790701777, "kl": 0.27734375, "learning_rate": 6.165084181092406e-08, "loss": 0.00027751445304602385, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5649, "train_speed(iter/s)": 0.027039 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 99.15625381469727, "completions/min_length": 41.75, "epoch": 8.419955323901712, "grad_norm": 0.006306169749918135, "kl": 0.26318359375, "learning_rate": 6.15371274019178e-08, "loss": 0.00026331702247262, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5650, "train_speed(iter/s)": 0.027038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.75, "completions/mean_length": 99.75000190734863, "completions/min_length": 45.75, "epoch": 8.42144452717796, "grad_norm": 0.0028587649439280644, "kl": 0.275634765625, "learning_rate": 6.142351108518929e-08, "loss": 0.00027581886388361454, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5651, "train_speed(iter/s)": 0.027038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 97.64583587646484, "completions/min_length": 33.75, "epoch": 8.422933730454208, "grad_norm": 0.002714585293954374, "kl": 0.273681640625, "learning_rate": 6.13099928861569e-08, "loss": 0.00027417694218456745, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5652, "train_speed(iter/s)": 0.02704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.5, "completions/mean_length": 91.06250381469727, "completions/min_length": 43.25, "epoch": 8.424422933730455, "grad_norm": 0.002595387057856807, "kl": 0.2666015625, "learning_rate": 6.119657283021634e-08, "loss": 0.00026588336913846433, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5653, "train_speed(iter/s)": 0.02704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 101.75000381469727, "completions/min_length": 43.25, "epoch": 8.425912137006701, "grad_norm": 0.7324050186790246, "kl": 0.282470703125, "learning_rate": 6.108325094274208e-08, "loss": -0.0004198807873763144, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5654, "train_speed(iter/s)": 0.02704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 94.86458587646484, "completions/min_length": 38.0, "epoch": 8.427401340282948, "grad_norm": 0.0025019173323749325, "kl": 0.24560546875, "learning_rate": 6.097002724908623e-08, "loss": 0.000245528673985973, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5655, "train_speed(iter/s)": 0.02704 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.75, "completions/mean_length": 99.68750381469727, "completions/min_length": 38.25, "epoch": 8.428890543559195, "grad_norm": 0.00242085706545927, "kl": 0.240478515625, "learning_rate": 6.085690177457903e-08, "loss": 0.0002408615982858464, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5656, "train_speed(iter/s)": 0.027038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.25, "completions/mean_length": 100.65625190734863, "completions/min_length": 42.5, "epoch": 8.430379746835444, "grad_norm": 0.002674647817078093, "kl": 0.2744140625, "learning_rate": 6.074387454452889e-08, "loss": 0.0002749438863247633, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5657, "train_speed(iter/s)": 0.027038 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 90.35416984558105, "completions/min_length": 40.0, "epoch": 8.43186895011169, "grad_norm": 0.0025887746166718705, "kl": 0.3037109375, "learning_rate": 6.063094558422177e-08, "loss": 0.00030269406852312386, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5658, "train_speed(iter/s)": 0.027037 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 96.33333587646484, "completions/min_length": 45.25, "epoch": 8.433358153387937, "grad_norm": 0.0026028553513054832, "kl": 0.27490234375, "learning_rate": 6.051811491892233e-08, "loss": 0.000274914171313867, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5659, "train_speed(iter/s)": 0.027035 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 101.26041984558105, "completions/min_length": 38.25, "epoch": 8.434847356664184, "grad_norm": 0.0024368045527108804, "kl": 0.251708984375, "learning_rate": 6.040538257387267e-08, "loss": 0.00025190840824507177, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5660, "train_speed(iter/s)": 0.027035 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.25, "completions/mean_length": 101.15625381469727, "completions/min_length": 44.75, "epoch": 8.436336559940433, "grad_norm": 0.0023484760433391764, "kl": 0.26416015625, "learning_rate": 6.029274857429311e-08, "loss": 0.0002639793383423239, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5661, "train_speed(iter/s)": 0.027035 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.0, "completions/mean_length": 98.50000381469727, "completions/min_length": 38.0, "epoch": 8.43782576321668, "grad_norm": 0.0025588553164343883, "kl": 0.278076171875, "learning_rate": 6.018021294538217e-08, "loss": 0.00027799533563666046, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5662, "train_speed(iter/s)": 0.027034 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.25, "completions/mean_length": 90.46875381469727, "completions/min_length": 37.25, "epoch": 8.439314966492926, "grad_norm": 0.002539941817993349, "kl": 0.2841796875, "learning_rate": 6.006777571231586e-08, "loss": 0.00028349272906780243, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5663, "train_speed(iter/s)": 0.027035 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 91.82291793823242, "completions/min_length": 37.25, "epoch": 8.440804169769173, "grad_norm": 0.002565255916509391, "kl": 0.2744140625, "learning_rate": 5.995543690024879e-08, "loss": 0.00027451079222373664, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5664, "train_speed(iter/s)": 0.027035 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 94.72916984558105, "completions/min_length": 41.5, "epoch": 8.442293373045421, "grad_norm": 1.7461529974503043, "kl": 0.2822265625, "learning_rate": 5.984319653431285e-08, "loss": -0.0013786115450784564, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.4717390313744545, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5665, "train_speed(iter/s)": 0.027034 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.0, "completions/mean_length": 98.68750190734863, "completions/min_length": 38.0, "epoch": 8.443782576321668, "grad_norm": 0.002565491824071804, "kl": 0.26318359375, "learning_rate": 5.973105463961864e-08, "loss": 0.00026351321139372885, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5666, "train_speed(iter/s)": 0.027035 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 85.85416793823242, "completions/min_length": 39.5, "epoch": 8.445271779597915, "grad_norm": 0.7388377255048459, "kl": 0.5146484375, "learning_rate": 5.961901124125424e-08, "loss": -0.004109411966055632, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5667, "train_speed(iter/s)": 0.027035 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 95.87500190734863, "completions/min_length": 43.25, "epoch": 8.446760982874162, "grad_norm": 0.002711614587221828, "kl": 0.27490234375, "learning_rate": 5.950706636428576e-08, "loss": 0.00027475625392980874, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5668, "train_speed(iter/s)": 0.027036 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.75, "completions/mean_length": 107.83333587646484, "completions/min_length": 49.25, "epoch": 8.448250186150409, "grad_norm": 0.0023540063019952106, "kl": 0.2470703125, "learning_rate": 5.939522003375752e-08, "loss": 0.00024715502513572574, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5669, "train_speed(iter/s)": 0.027034 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.25, "completions/mean_length": 96.03125190734863, "completions/min_length": 41.75, "epoch": 8.449739389426657, "grad_norm": 0.002240653469588878, "kl": 0.26806640625, "learning_rate": 5.928347227469155e-08, "loss": 0.00026805876404978335, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5670, "train_speed(iter/s)": 0.027032 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 95.44791793823242, "completions/min_length": 37.5, "epoch": 8.451228592702904, "grad_norm": 0.0022157809123036095, "kl": 0.27001953125, "learning_rate": 5.9171823112087845e-08, "loss": 0.0002701916382648051, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5671, "train_speed(iter/s)": 0.027031 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.75, "completions/mean_length": 94.20833587646484, "completions/min_length": 36.0, "epoch": 8.45271779597915, "grad_norm": 0.0034605124595143244, "kl": 0.2890625, "learning_rate": 5.906027257092444e-08, "loss": 0.000289129966404289, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5672, "train_speed(iter/s)": 0.027032 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.0, "completions/mean_length": 89.07291984558105, "completions/min_length": 30.75, "epoch": 8.454206999255398, "grad_norm": 0.004716919669144221, "kl": 0.27734375, "learning_rate": 5.894882067615714e-08, "loss": 0.0002773040614556521, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5673, "train_speed(iter/s)": 0.027033 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 99.22916793823242, "completions/min_length": 46.25, "epoch": 8.455696202531646, "grad_norm": 0.002496293053457997, "kl": 0.254638671875, "learning_rate": 5.8837467452720005e-08, "loss": 0.00025469574029557407, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5674, "train_speed(iter/s)": 0.027031 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.75, "completions/mean_length": 85.40625381469727, "completions/min_length": 26.75, "epoch": 8.457185405807893, "grad_norm": 0.002645829995846963, "kl": 0.302734375, "learning_rate": 5.8726212925524765e-08, "loss": 0.000302218075376004, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5675, "train_speed(iter/s)": 0.027029 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.5, "completions/mean_length": 99.50000381469727, "completions/min_length": 44.75, "epoch": 8.45867460908414, "grad_norm": 0.002323344555500581, "kl": 0.2587890625, "learning_rate": 5.861505711946102e-08, "loss": 0.000259312626440078, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5676, "train_speed(iter/s)": 0.027029 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.75, "completions/mean_length": 103.97917175292969, "completions/min_length": 48.25, "epoch": 8.460163812360387, "grad_norm": 0.002523205578468501, "kl": 0.269775390625, "learning_rate": 5.8504000059396484e-08, "loss": 0.0002693596761673689, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5677, "train_speed(iter/s)": 0.027029 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.75, "completions/mean_length": 108.09375190734863, "completions/min_length": 52.25, "epoch": 8.461653015636635, "grad_norm": 0.0023971114194489577, "kl": 0.234375, "learning_rate": 5.839304177017662e-08, "loss": 0.00023465740378014743, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5678, "train_speed(iter/s)": 0.02703 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.0, "completions/mean_length": 97.03125190734863, "completions/min_length": 46.5, "epoch": 8.463142218912882, "grad_norm": 0.002543795618012354, "kl": 0.27197265625, "learning_rate": 5.828218227662479e-08, "loss": 0.00027121457969769835, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5679, "train_speed(iter/s)": 0.027029 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 106.31250190734863, "completions/min_length": 45.25, "epoch": 8.464631422189129, "grad_norm": 0.0035387017224353125, "kl": 0.250732421875, "learning_rate": 5.817142160354249e-08, "loss": 0.0002503266732674092, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5680, "train_speed(iter/s)": 0.027028 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 97.06250190734863, "completions/min_length": 37.5, "epoch": 8.466120625465376, "grad_norm": 0.00230643739410554, "kl": 0.26171875, "learning_rate": 5.806075977570885e-08, "loss": 0.0002620797313284129, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5681, "train_speed(iter/s)": 0.027027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/mean_length": 96.23958778381348, "completions/min_length": 37.75, "epoch": 8.467609828741622, "grad_norm": 0.002554036176265073, "kl": 0.27880859375, "learning_rate": 5.7950196817880994e-08, "loss": 0.0002789658901747316, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5682, "train_speed(iter/s)": 0.027027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 112.08333587646484, "completions/min_length": 43.25, "epoch": 8.469099032017871, "grad_norm": 0.0024279256388406232, "kl": 0.246337890625, "learning_rate": 5.7839732754793866e-08, "loss": 0.0002458056842442602, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5683, "train_speed(iter/s)": 0.027026 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 88.14583587646484, "completions/min_length": 34.0, "epoch": 8.470588235294118, "grad_norm": 0.003157423342352387, "kl": 0.2783203125, "learning_rate": 5.772936761116026e-08, "loss": 0.0002785134711302817, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5684, "train_speed(iter/s)": 0.027027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 97.90625190734863, "completions/min_length": 37.5, "epoch": 8.472077438570365, "grad_norm": 0.0027131564017782153, "kl": 0.28857421875, "learning_rate": 5.761910141167109e-08, "loss": 0.00028818496502935886, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5685, "train_speed(iter/s)": 0.027025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.25, "completions/mean_length": 98.51041793823242, "completions/min_length": 34.25, "epoch": 8.473566641846611, "grad_norm": 0.002245923257457985, "kl": 0.26318359375, "learning_rate": 5.750893418099484e-08, "loss": 0.0002630891976878047, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5686, "train_speed(iter/s)": 0.027025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.25, "completions/mean_length": 84.82291984558105, "completions/min_length": 33.75, "epoch": 8.47505584512286, "grad_norm": 0.0034419916808653304, "kl": 0.266845703125, "learning_rate": 5.7398865943778016e-08, "loss": 0.00026612437795847654, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5687, "train_speed(iter/s)": 0.027025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.75, "completions/mean_length": 97.55208587646484, "completions/min_length": 39.0, "epoch": 8.476545048399107, "grad_norm": 0.0023413261222496266, "kl": 0.25927734375, "learning_rate": 5.7288896724644876e-08, "loss": 0.00025901428307406604, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5688, "train_speed(iter/s)": 0.027026 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 102.08333587646484, "completions/min_length": 42.75, "epoch": 8.478034251675354, "grad_norm": 0.0025323334688550927, "kl": 0.260986328125, "learning_rate": 5.7179026548197454e-08, "loss": 0.00026088341837748885, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5689, "train_speed(iter/s)": 0.027026 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 94.08333396911621, "completions/min_length": 40.75, "epoch": 8.4795234549516, "grad_norm": 0.002522836438690825, "kl": 0.28857421875, "learning_rate": 5.706925543901608e-08, "loss": 0.000287902366835624, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5690, "train_speed(iter/s)": 0.027026 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 283.75, "completions/mean_length": 102.14583587646484, "completions/min_length": 36.5, "epoch": 8.481012658227849, "grad_norm": 0.002642333399584056, "kl": 0.276611328125, "learning_rate": 5.695958342165819e-08, "loss": 0.0002764585369732231, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5691, "train_speed(iter/s)": 0.027025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 100.98958778381348, "completions/min_length": 50.5, "epoch": 8.482501861504096, "grad_norm": 0.0024487480426194554, "kl": 0.25341796875, "learning_rate": 5.6850010520659744e-08, "loss": 0.00025367707712575793, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5692, "train_speed(iter/s)": 0.027025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.0, "completions/mean_length": 102.83333587646484, "completions/min_length": 40.75, "epoch": 8.483991064780342, "grad_norm": 0.024067999049836034, "kl": 0.258544921875, "learning_rate": 5.6740536760534144e-08, "loss": 0.00025823526084423065, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5693, "train_speed(iter/s)": 0.027026 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.25, "completions/mean_length": 112.95833587646484, "completions/min_length": 44.75, "epoch": 8.48548026805659, "grad_norm": 0.00814876071957666, "kl": 0.263916015625, "learning_rate": 5.663116216577257e-08, "loss": 0.00026381516363471746, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5694, "train_speed(iter/s)": 0.027024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/mean_length": 101.29166793823242, "completions/min_length": 41.75, "epoch": 8.486969471332836, "grad_norm": 0.002500959907788293, "kl": 0.252685546875, "learning_rate": 5.6521886760844465e-08, "loss": 0.0002528618788346648, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5695, "train_speed(iter/s)": 0.027025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 102.79166984558105, "completions/min_length": 48.0, "epoch": 8.488458674609085, "grad_norm": 1.106792738255348, "kl": 0.26904296875, "learning_rate": 5.6412710570196366e-08, "loss": -0.006390630267560482, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5696, "train_speed(iter/s)": 0.027024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 87.93750190734863, "completions/min_length": 30.25, "epoch": 8.489947877885331, "grad_norm": 0.00254761233953106, "kl": 0.27392578125, "learning_rate": 5.63036336182533e-08, "loss": 0.0002739282208494842, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5697, "train_speed(iter/s)": 0.027023 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 99.02083778381348, "completions/min_length": 44.0, "epoch": 8.491437081161578, "grad_norm": 0.7642575100035667, "kl": 0.264404296875, "learning_rate": 5.619465592941775e-08, "loss": 0.024077169597148895, "memory(GiB)": 112.53, "reward": 1.6875000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000149011612, "rewards/CineAccuracyORM/std": 0.3666742444038391, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5698, "train_speed(iter/s)": 0.027024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 84.06250190734863, "completions/min_length": 35.5, "epoch": 8.492926284437825, "grad_norm": 2.0183984749974813, "kl": 0.317626953125, "learning_rate": 5.608577752806987e-08, "loss": -0.020237447693943977, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5699, "train_speed(iter/s)": 0.027024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 105.57291793823242, "completions/min_length": 38.25, "epoch": 8.494415487714074, "grad_norm": 0.002290701984338583, "kl": 0.26123046875, "learning_rate": 5.597699843856812e-08, "loss": 0.0002609323419164866, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5700, "train_speed(iter/s)": 0.027023 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 100.63541984558105, "completions/min_length": 41.0, "epoch": 8.49590469099032, "grad_norm": 0.004430663004876917, "kl": 0.273193359375, "learning_rate": 5.586831868524805e-08, "loss": 0.00027306703850626945, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5701, "train_speed(iter/s)": 0.027024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 89.54166984558105, "completions/min_length": 39.75, "epoch": 8.497393894266567, "grad_norm": 0.0026171101333132916, "kl": 0.27294921875, "learning_rate": 5.575973829242364e-08, "loss": 0.000272608594968915, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5702, "train_speed(iter/s)": 0.027024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 398.0, "completions/mean_length": 107.12500381469727, "completions/min_length": 46.25, "epoch": 8.498883097542814, "grad_norm": 0.867898019498315, "kl": 0.41845703125, "learning_rate": 5.5651257284385964e-08, "loss": 0.05659586191177368, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 5703, "train_speed(iter/s)": 0.027024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 94.23958587646484, "completions/min_length": 42.25, "epoch": 8.500372300819063, "grad_norm": 0.0033153785414028864, "kl": 0.28955078125, "learning_rate": 5.554287568540461e-08, "loss": 0.0002891436161007732, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5704, "train_speed(iter/s)": 0.027023 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 91.69791793823242, "completions/min_length": 40.75, "epoch": 8.50186150409531, "grad_norm": 0.002296245840160453, "kl": 0.28076171875, "learning_rate": 5.5434593519726346e-08, "loss": 0.0002808170102071017, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5705, "train_speed(iter/s)": 0.027022 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.75, "completions/mean_length": 108.64583587646484, "completions/min_length": 35.0, "epoch": 8.503350707371556, "grad_norm": 0.0021664117527977547, "kl": 0.252197265625, "learning_rate": 5.532641081157591e-08, "loss": 0.00025218582595698535, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5706, "train_speed(iter/s)": 0.027024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 98.14583396911621, "completions/min_length": 35.75, "epoch": 8.504839910647803, "grad_norm": 1.3003803398928786, "kl": 0.2548828125, "learning_rate": 5.521832758515599e-08, "loss": 0.017448164522647858, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.7395833386108279, "rewards/CineAccuracyORM/std": 0.12161349877715111, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5707, "train_speed(iter/s)": 0.027025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.0, "completions/mean_length": 107.20833778381348, "completions/min_length": 43.75, "epoch": 8.50632911392405, "grad_norm": 0.00946209391963651, "kl": 0.256103515625, "learning_rate": 5.511034386464641e-08, "loss": 0.00025556934997439384, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5708, "train_speed(iter/s)": 0.027025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 92.15625, "completions/min_length": 40.5, "epoch": 8.507818317200298, "grad_norm": 0.0024678034808080854, "kl": 0.27978515625, "learning_rate": 5.5002459674205525e-08, "loss": 0.0002796097251120955, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5709, "train_speed(iter/s)": 0.027025 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 90.88541793823242, "completions/min_length": 42.5, "epoch": 8.509307520476545, "grad_norm": 0.002485076142787609, "kl": 0.27587890625, "learning_rate": 5.4894675037968796e-08, "loss": 0.0002762621152214706, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5710, "train_speed(iter/s)": 0.027026 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 94.73958396911621, "completions/min_length": 46.25, "epoch": 8.510796723752792, "grad_norm": 1.4067993900662419, "kl": 0.292236328125, "learning_rate": 5.478698998004966e-08, "loss": 0.005735835060477257, "memory(GiB)": 112.53, "reward": 1.635416716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.635416679084301, "rewards/CineAccuracyORM/std": 0.4456155598163605, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5711, "train_speed(iter/s)": 0.027024 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.5, "completions/mean_length": 110.67708587646484, "completions/min_length": 39.25, "epoch": 8.512285927029039, "grad_norm": 0.0021502854664918226, "kl": 0.2421875, "learning_rate": 5.4679404524539505e-08, "loss": 0.00024148070951923728, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5712, "train_speed(iter/s)": 0.027023 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.5, "completions/mean_length": 106.66666793823242, "completions/min_length": 47.25, "epoch": 8.513775130305287, "grad_norm": 0.8920798426836528, "kl": 0.251708984375, "learning_rate": 5.457191869550687e-08, "loss": 0.01494413148611784, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5713, "train_speed(iter/s)": 0.027022 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 106.66666984558105, "completions/min_length": 46.5, "epoch": 8.515264333581534, "grad_norm": 0.002468259513508114, "kl": 0.27392578125, "learning_rate": 5.4464532516998505e-08, "loss": 0.0002742705983109772, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5714, "train_speed(iter/s)": 0.027022 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 98.31250190734863, "completions/min_length": 37.0, "epoch": 8.516753536857781, "grad_norm": 0.7561402556142754, "kl": 0.25927734375, "learning_rate": 5.435724601303865e-08, "loss": 0.017280440777540207, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500149011612, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5715, "train_speed(iter/s)": 0.027021 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 96.77083396911621, "completions/min_length": 35.75, "epoch": 8.518242740134028, "grad_norm": 1.0642906857564063, "kl": 0.30078125, "learning_rate": 5.425005920762932e-08, "loss": 0.013078145682811737, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833432674408, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5716, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 87.82291984558105, "completions/min_length": 38.0, "epoch": 8.519731943410276, "grad_norm": 0.0039400131994398275, "kl": 0.281982421875, "learning_rate": 5.414297212475011e-08, "loss": 0.0002820480731315911, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5717, "train_speed(iter/s)": 0.02702 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 300.0, "completions/mean_length": 101.81250190734863, "completions/min_length": 31.75, "epoch": 8.521221146686523, "grad_norm": 0.005843297984361367, "kl": 0.2841796875, "learning_rate": 5.4035984788358356e-08, "loss": 0.00028428371297195554, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5718, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.25, "completions/mean_length": 103.12500190734863, "completions/min_length": 40.5, "epoch": 8.52271034996277, "grad_norm": 0.44561316257225236, "kl": 0.28369140625, "learning_rate": 5.392909722238931e-08, "loss": -0.018852826207876205, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5719, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 98.11458587646484, "completions/min_length": 39.25, "epoch": 8.524199553239017, "grad_norm": 0.009970172514914735, "kl": 0.259033203125, "learning_rate": 5.382230945075556e-08, "loss": 0.0002587377675808966, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5720, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.0, "completions/mean_length": 110.05208587646484, "completions/min_length": 44.75, "epoch": 8.525688756515265, "grad_norm": 0.002467210530759301, "kl": 0.25439453125, "learning_rate": 5.371562149734754e-08, "loss": 0.000254602957284078, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5721, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.75, "completions/mean_length": 101.26041793823242, "completions/min_length": 52.25, "epoch": 8.527177959791512, "grad_norm": 0.005238026556472229, "kl": 0.27978515625, "learning_rate": 5.360903338603323e-08, "loss": 0.0002799705252982676, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5722, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 99.32291984558105, "completions/min_length": 47.5, "epoch": 8.528667163067759, "grad_norm": 0.002658997260713597, "kl": 0.26416015625, "learning_rate": 5.350254514065855e-08, "loss": 0.0002638460136950016, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5723, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.75, "completions/mean_length": 109.43750190734863, "completions/min_length": 45.5, "epoch": 8.530156366344006, "grad_norm": 1.3518323449488803, "kl": 0.261474609375, "learning_rate": 5.339615678504677e-08, "loss": 0.013398092240095139, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5724, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.0, "completions/mean_length": 103.21875190734863, "completions/min_length": 42.0, "epoch": 8.531645569620252, "grad_norm": 0.0026056405312966458, "kl": 0.261474609375, "learning_rate": 5.328986834299903e-08, "loss": 0.00026178875123150647, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5725, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 95.56250190734863, "completions/min_length": 41.75, "epoch": 8.533134772896501, "grad_norm": 0.002543634952862986, "kl": 0.2861328125, "learning_rate": 5.318367983829392e-08, "loss": 0.0002859523519873619, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5726, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 290.75, "completions/mean_length": 108.03125381469727, "completions/min_length": 44.75, "epoch": 8.534623976172748, "grad_norm": 0.002512333645503904, "kl": 0.2626953125, "learning_rate": 5.307759129468775e-08, "loss": 0.00026247333153150976, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5727, "train_speed(iter/s)": 0.027019 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 96.38541984558105, "completions/min_length": 44.25, "epoch": 8.536113179448995, "grad_norm": 0.0024843917846928193, "kl": 0.265380859375, "learning_rate": 5.297160273591472e-08, "loss": 0.0002648339723236859, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5728, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 100.27083587646484, "completions/min_length": 36.5, "epoch": 8.537602382725241, "grad_norm": 0.002815973770395346, "kl": 0.2568359375, "learning_rate": 5.286571418568614e-08, "loss": 0.0002570106298662722, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5729, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 102.84375, "completions/min_length": 43.25, "epoch": 8.53909158600149, "grad_norm": 0.003628115897302281, "kl": 0.265869140625, "learning_rate": 5.275992566769144e-08, "loss": 0.0002656746655702591, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5730, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.5, "completions/mean_length": 105.39583587646484, "completions/min_length": 44.0, "epoch": 8.540580789277737, "grad_norm": 0.0024966963263747207, "kl": 0.24609375, "learning_rate": 5.265423720559747e-08, "loss": 0.0002458853123243898, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5731, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.5, "completions/mean_length": 95.61458587646484, "completions/min_length": 41.75, "epoch": 8.542069992553984, "grad_norm": 0.542342337620222, "kl": 0.3359375, "learning_rate": 5.2548648823048544e-08, "loss": -0.0014259375166147947, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5732, "train_speed(iter/s)": 0.027015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.25, "completions/mean_length": 105.22916793823242, "completions/min_length": 38.5, "epoch": 8.54355919583023, "grad_norm": 0.002339526822429618, "kl": 0.2626953125, "learning_rate": 5.2443160543667045e-08, "loss": 0.00026260135928168893, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5733, "train_speed(iter/s)": 0.027015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.5, "completions/mean_length": 99.85416793823242, "completions/min_length": 37.75, "epoch": 8.545048399106477, "grad_norm": 0.002352065003298435, "kl": 0.27294921875, "learning_rate": 5.233777239105225e-08, "loss": 0.00027256974135525525, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5734, "train_speed(iter/s)": 0.027015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 91.50000286102295, "completions/min_length": 41.0, "epoch": 8.546537602382726, "grad_norm": 0.002885024937990056, "kl": 0.292724609375, "learning_rate": 5.223248438878175e-08, "loss": 0.00029259739676490426, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5735, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 101.42708587646484, "completions/min_length": 45.75, "epoch": 8.548026805658973, "grad_norm": 0.002427261495719854, "kl": 0.255859375, "learning_rate": 5.2127296560410374e-08, "loss": 0.0002553003141656518, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5736, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 297.0, "completions/mean_length": 110.34375381469727, "completions/min_length": 40.5, "epoch": 8.54951600893522, "grad_norm": 0.002600659163407584, "kl": 0.264404296875, "learning_rate": 5.202220892947051e-08, "loss": 0.00026399065973237157, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5737, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.5, "completions/mean_length": 110.61458396911621, "completions/min_length": 47.0, "epoch": 8.551005212211466, "grad_norm": 0.002501906545428362, "kl": 0.232421875, "learning_rate": 5.191722151947225e-08, "loss": 0.00023232598323374987, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5738, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 90.76041793823242, "completions/min_length": 32.25, "epoch": 8.552494415487715, "grad_norm": 0.002598089354162714, "kl": 0.292724609375, "learning_rate": 5.1812334353903145e-08, "loss": 0.0002922177081927657, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5739, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 87.16666984558105, "completions/min_length": 39.5, "epoch": 8.553983618763962, "grad_norm": 0.0027522433216891572, "kl": 0.287109375, "learning_rate": 5.170754745622863e-08, "loss": 0.00028719333931803703, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5740, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 99.20833587646484, "completions/min_length": 44.0, "epoch": 8.555472822040208, "grad_norm": 0.002538793442912756, "kl": 0.281982421875, "learning_rate": 5.1602860849891184e-08, "loss": 0.0002816772321239114, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5741, "train_speed(iter/s)": 0.027015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 106.11458587646484, "completions/min_length": 40.25, "epoch": 8.556962025316455, "grad_norm": 0.002343348867392483, "kl": 0.2333984375, "learning_rate": 5.149827455831135e-08, "loss": 0.00023334333673119545, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5742, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/mean_length": 95.29166793823242, "completions/min_length": 39.75, "epoch": 8.558451228592704, "grad_norm": 0.0024957168530690054, "kl": 0.27001953125, "learning_rate": 5.139378860488691e-08, "loss": 0.00027000033878721297, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5743, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 94.18750381469727, "completions/min_length": 40.5, "epoch": 8.55994043186895, "grad_norm": 0.013347682853705738, "kl": 0.27880859375, "learning_rate": 5.1289403012993336e-08, "loss": 0.00027939752908423543, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5744, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.75, "completions/mean_length": 107.03125190734863, "completions/min_length": 34.25, "epoch": 8.561429635145197, "grad_norm": 0.006282425226139236, "kl": 0.249267578125, "learning_rate": 5.118511780598378e-08, "loss": 0.0002493747742846608, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5745, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.25, "completions/mean_length": 108.65625190734863, "completions/min_length": 36.75, "epoch": 8.562918838421444, "grad_norm": 0.002385153272099315, "kl": 0.23876953125, "learning_rate": 5.1080933007188465e-08, "loss": 0.00023865493130870163, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5746, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 92.28125381469727, "completions/min_length": 40.75, "epoch": 8.564408041697693, "grad_norm": 0.0025593972268073174, "kl": 0.26904296875, "learning_rate": 5.097684863991575e-08, "loss": 0.0002688373497221619, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5747, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.25, "completions/mean_length": 86.63541984558105, "completions/min_length": 40.75, "epoch": 8.56589724497394, "grad_norm": 1.117916739047961, "kl": 0.2998046875, "learning_rate": 5.087286472745112e-08, "loss": -0.009230058640241623, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5748, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 85.44791984558105, "completions/min_length": 40.5, "epoch": 8.567386448250186, "grad_norm": 0.0027466081060842275, "kl": 0.30419921875, "learning_rate": 5.076898129305768e-08, "loss": 0.0003038665745407343, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5749, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 99.79166984558105, "completions/min_length": 44.25, "epoch": 8.568875651526433, "grad_norm": 0.002895879377038373, "kl": 0.282958984375, "learning_rate": 5.0665198359976126e-08, "loss": 0.0002825478441081941, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5750, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 84.41666793823242, "completions/min_length": 39.5, "epoch": 8.57036485480268, "grad_norm": 0.002836422056881531, "kl": 0.285888671875, "learning_rate": 5.056151595142449e-08, "loss": 0.00028599228244274855, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5751, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.75, "completions/mean_length": 103.57291984558105, "completions/min_length": 35.25, "epoch": 8.571854058078928, "grad_norm": 0.002395906744605068, "kl": 0.2744140625, "learning_rate": 5.045793409059873e-08, "loss": 0.0002745245583355427, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5752, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.25, "completions/mean_length": 100.66666793823242, "completions/min_length": 40.0, "epoch": 8.573343261355175, "grad_norm": 0.677508493060405, "kl": 0.275390625, "learning_rate": 5.03544528006718e-08, "loss": -0.019521869719028473, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5753, "train_speed(iter/s)": 0.027015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.25, "completions/mean_length": 86.15625190734863, "completions/min_length": 34.75, "epoch": 8.574832464631422, "grad_norm": 1.4347014472691908, "kl": 0.291015625, "learning_rate": 5.0251072104794513e-08, "loss": -0.007545706816017628, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5754, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 84.70833587646484, "completions/min_length": 38.25, "epoch": 8.576321667907669, "grad_norm": 0.003371791914491742, "kl": 0.304931640625, "learning_rate": 5.014779202609493e-08, "loss": 0.0003045608173124492, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5755, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 95.73958396911621, "completions/min_length": 39.25, "epoch": 8.577810871183917, "grad_norm": 1.2949830611169306, "kl": 0.28125, "learning_rate": 5.004461258767872e-08, "loss": 0.012337497435510159, "memory(GiB)": 112.53, "reward": 1.7187500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500074505806, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5756, "train_speed(iter/s)": 0.027019 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 103.5, "completions/min_length": 37.0, "epoch": 8.579300074460164, "grad_norm": 0.004090175350093375, "kl": 0.25634765625, "learning_rate": 4.994153381262917e-08, "loss": 0.00025632287724874914, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5757, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 104.76041984558105, "completions/min_length": 48.5, "epoch": 8.580789277736411, "grad_norm": 0.0025995401060360606, "kl": 0.23486328125, "learning_rate": 4.983855572400686e-08, "loss": 0.00023480963136535138, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5758, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 94.56250190734863, "completions/min_length": 33.5, "epoch": 8.582278481012658, "grad_norm": 0.0031396470999055202, "kl": 0.28466796875, "learning_rate": 4.9735678344849876e-08, "loss": 0.000284741137875244, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5759, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.25, "completions/mean_length": 96.13541984558105, "completions/min_length": 35.0, "epoch": 8.583767684288905, "grad_norm": 0.0029940300314312616, "kl": 0.276123046875, "learning_rate": 4.96329016981738e-08, "loss": 0.0002763083321042359, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5760, "train_speed(iter/s)": 0.027018 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.0, "completions/mean_length": 103.85416984558105, "completions/min_length": 42.25, "epoch": 8.585256887565153, "grad_norm": 0.02231867289472599, "kl": 0.283447265625, "learning_rate": 4.953022580697153e-08, "loss": 0.00028315396048128605, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5761, "train_speed(iter/s)": 0.027017 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 309.0, "completions/mean_length": 111.89583778381348, "completions/min_length": 47.75, "epoch": 8.5867460908414, "grad_norm": 0.0023666537214253845, "kl": 0.2333984375, "learning_rate": 4.942765069421384e-08, "loss": 0.00023337930906563997, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5762, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 90.41666984558105, "completions/min_length": 36.0, "epoch": 8.588235294117647, "grad_norm": 0.002394091308430007, "kl": 0.2900390625, "learning_rate": 4.932517638284861e-08, "loss": 0.00029035285115242004, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5763, "train_speed(iter/s)": 0.027016 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 99.46875381469727, "completions/min_length": 42.25, "epoch": 8.589724497393894, "grad_norm": 0.0024220398783239487, "kl": 0.2451171875, "learning_rate": 4.922280289580111e-08, "loss": 0.00024526624474674463, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5764, "train_speed(iter/s)": 0.027015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.25, "completions/mean_length": 96.55208778381348, "completions/min_length": 40.5, "epoch": 8.591213700670142, "grad_norm": 0.0036578674985517225, "kl": 0.292724609375, "learning_rate": 4.912053025597429e-08, "loss": 0.00029303389601409435, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5765, "train_speed(iter/s)": 0.027014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 99.20833778381348, "completions/min_length": 39.75, "epoch": 8.592702903946389, "grad_norm": 0.002284284487729479, "kl": 0.23193359375, "learning_rate": 4.9018358486248466e-08, "loss": 0.00023218055139295757, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5766, "train_speed(iter/s)": 0.027015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 91.73958587646484, "completions/min_length": 40.75, "epoch": 8.594192107222636, "grad_norm": 1.6879940945361853, "kl": 0.268798828125, "learning_rate": 4.8916287609481136e-08, "loss": -0.002528980141505599, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5767, "train_speed(iter/s)": 0.027014 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.25, "completions/mean_length": 95.85416793823242, "completions/min_length": 40.0, "epoch": 8.595681310498883, "grad_norm": 0.018236431523387116, "kl": 0.24853515625, "learning_rate": 4.881431764850774e-08, "loss": 0.00024873518850654364, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5768, "train_speed(iter/s)": 0.027015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.75, "completions/mean_length": 99.66666984558105, "completions/min_length": 43.5, "epoch": 8.597170513775131, "grad_norm": 0.0024200572627887124, "kl": 0.275146484375, "learning_rate": 4.871244862614077e-08, "loss": 0.0002755498280748725, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5769, "train_speed(iter/s)": 0.027013 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.5, "completions/mean_length": 100.69791984558105, "completions/min_length": 37.0, "epoch": 8.598659717051378, "grad_norm": 0.0027591329954338123, "kl": 0.28173828125, "learning_rate": 4.861068056517015e-08, "loss": 0.00028155819745734334, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5770, "train_speed(iter/s)": 0.027013 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 100.62500190734863, "completions/min_length": 42.75, "epoch": 8.600148920327625, "grad_norm": 0.002334928562853012, "kl": 0.259033203125, "learning_rate": 4.850901348836328e-08, "loss": 0.00025868212105706334, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5771, "train_speed(iter/s)": 0.027015 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.75, "completions/mean_length": 101.38541984558105, "completions/min_length": 37.75, "epoch": 8.601638123603871, "grad_norm": 0.0025902932582247424, "kl": 0.2783203125, "learning_rate": 4.840744741846492e-08, "loss": 0.0002783820382319391, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5772, "train_speed(iter/s)": 0.027012 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.75, "completions/mean_length": 104.98958396911621, "completions/min_length": 37.0, "epoch": 8.60312732688012, "grad_norm": 0.002235212144814635, "kl": 0.261962890625, "learning_rate": 4.8305982378197495e-08, "loss": 0.0002622719621285796, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5773, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 98.95833587646484, "completions/min_length": 41.75, "epoch": 8.604616530156367, "grad_norm": 0.0024281539948733826, "kl": 0.24462890625, "learning_rate": 4.820461839026046e-08, "loss": 0.0002445717982482165, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5774, "train_speed(iter/s)": 0.027012 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 99.26041984558105, "completions/min_length": 50.5, "epoch": 8.606105733432614, "grad_norm": 1.2375545949589246, "kl": 0.248779296875, "learning_rate": 4.81033554773308e-08, "loss": 0.014314322732388973, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5775, "train_speed(iter/s)": 0.027013 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.5, "completions/mean_length": 89.62500190734863, "completions/min_length": 36.25, "epoch": 8.60759493670886, "grad_norm": 0.0022687000525219007, "kl": 0.2783203125, "learning_rate": 4.800219366206298e-08, "loss": 0.00027829757891595364, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5776, "train_speed(iter/s)": 0.027012 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 91.38541793823242, "completions/min_length": 40.25, "epoch": 8.609084139985107, "grad_norm": 0.002341542830320037, "kl": 0.2822265625, "learning_rate": 4.79011329670887e-08, "loss": 0.00028200045926496387, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5777, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.75, "completions/mean_length": 99.70833778381348, "completions/min_length": 41.25, "epoch": 8.610573343261356, "grad_norm": 0.0023721096170279676, "kl": 0.267578125, "learning_rate": 4.7800173415017296e-08, "loss": 0.00026750826509669423, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5778, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 99.18750381469727, "completions/min_length": 44.5, "epoch": 8.612062546537603, "grad_norm": 0.002324464297022786, "kl": 0.26904296875, "learning_rate": 4.7699315028434996e-08, "loss": 0.0002682632184587419, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5779, "train_speed(iter/s)": 0.027011 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 96.09375381469727, "completions/min_length": 43.25, "epoch": 8.61355174981385, "grad_norm": 0.010918593522649879, "kl": 0.251708984375, "learning_rate": 4.7598557829905904e-08, "loss": 0.00025165514671243727, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5780, "train_speed(iter/s)": 0.027012 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 111.4687557220459, "completions/min_length": 43.25, "epoch": 8.615040953090096, "grad_norm": 0.002404350414980856, "kl": 0.2373046875, "learning_rate": 4.749790184197122e-08, "loss": 0.00023714249255135655, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5781, "train_speed(iter/s)": 0.027012 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.25, "completions/mean_length": 93.36458587646484, "completions/min_length": 40.5, "epoch": 8.616530156366345, "grad_norm": 0.0024516975421831973, "kl": 0.28466796875, "learning_rate": 4.7397347087149485e-08, "loss": 0.0002848133444786072, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5782, "train_speed(iter/s)": 0.027011 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.75, "completions/mean_length": 107.72916793823242, "completions/min_length": 37.5, "epoch": 8.618019359642592, "grad_norm": 0.002871394073526438, "kl": 0.25, "learning_rate": 4.729689358793692e-08, "loss": 0.0002497559180483222, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5783, "train_speed(iter/s)": 0.027011 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 92.94791984558105, "completions/min_length": 43.25, "epoch": 8.619508562918838, "grad_norm": 0.0026200546584061, "kl": 0.2607421875, "learning_rate": 4.719654136680645e-08, "loss": 0.00026052410248667, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5784, "train_speed(iter/s)": 0.027011 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 99.28125, "completions/min_length": 46.0, "epoch": 8.620997766195085, "grad_norm": 0.0025008362010349506, "kl": 0.2705078125, "learning_rate": 4.7096290446209056e-08, "loss": 0.0002702806086745113, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5785, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 91.40625, "completions/min_length": 38.25, "epoch": 8.622486969471332, "grad_norm": 2.1680487473629264, "kl": 0.26513671875, "learning_rate": 4.699614084857256e-08, "loss": -0.0020519939716905355, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5786, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.0, "completions/mean_length": 91.42708778381348, "completions/min_length": 34.75, "epoch": 8.62397617274758, "grad_norm": 0.05671951147930643, "kl": 0.292236328125, "learning_rate": 4.6896092596302395e-08, "loss": 0.00029258077847771347, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5787, "train_speed(iter/s)": 0.027009 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 107.78125381469727, "completions/min_length": 42.75, "epoch": 8.625465376023827, "grad_norm": 0.10978925668201435, "kl": 0.2841796875, "learning_rate": 4.679614571178109e-08, "loss": 0.0002837981446646154, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5788, "train_speed(iter/s)": 0.027009 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 99.54166984558105, "completions/min_length": 49.5, "epoch": 8.626954579300074, "grad_norm": 0.0028039362151889916, "kl": 0.251220703125, "learning_rate": 4.669630021736853e-08, "loss": 0.0002513169893063605, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5789, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 92.23958587646484, "completions/min_length": 44.5, "epoch": 8.628443782576321, "grad_norm": 0.004363356869391591, "kl": 0.27880859375, "learning_rate": 4.659655613540231e-08, "loss": 0.00027857060194946826, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5790, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.25, "completions/mean_length": 105.86458587646484, "completions/min_length": 43.75, "epoch": 8.62993298585257, "grad_norm": 0.0025677163731106446, "kl": 0.2353515625, "learning_rate": 4.649691348819684e-08, "loss": 0.00023550991318188608, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5791, "train_speed(iter/s)": 0.027009 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 92.18750190734863, "completions/min_length": 37.75, "epoch": 8.631422189128816, "grad_norm": 0.0023376383936053145, "kl": 0.2734375, "learning_rate": 4.639737229804402e-08, "loss": 0.0002732086868491024, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5792, "train_speed(iter/s)": 0.027009 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.25, "completions/mean_length": 96.23958396911621, "completions/min_length": 42.0, "epoch": 8.632911392405063, "grad_norm": 0.0027778765364554142, "kl": 0.25830078125, "learning_rate": 4.629793258721315e-08, "loss": 0.00025827327044680715, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5793, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 89.25000190734863, "completions/min_length": 39.25, "epoch": 8.63440059568131, "grad_norm": 1.223325997519472, "kl": 0.2783203125, "learning_rate": 4.619859437795054e-08, "loss": -0.010675022378563881, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5794, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.75, "completions/mean_length": 104.34375190734863, "completions/min_length": 46.5, "epoch": 8.635889798957558, "grad_norm": 1.417712775743428, "kl": 0.2392578125, "learning_rate": 4.6099357692480245e-08, "loss": 0.016753219068050385, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5795, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.5, "completions/mean_length": 110.57292175292969, "completions/min_length": 43.0, "epoch": 8.637379002233805, "grad_norm": 0.002453101967257236, "kl": 0.2509765625, "learning_rate": 4.6000222553003234e-08, "loss": 0.00025076360907405615, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5796, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 88.58333587646484, "completions/min_length": 37.75, "epoch": 8.638868205510052, "grad_norm": 0.0021363462822595047, "kl": 0.26708984375, "learning_rate": 4.590118898169793e-08, "loss": 0.0002672125119715929, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5797, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.5, "completions/mean_length": 97.11458587646484, "completions/min_length": 36.75, "epoch": 8.640357408786299, "grad_norm": 0.7778318432602628, "kl": 0.30224609375, "learning_rate": 4.580225700071988e-08, "loss": -0.00786504615098238, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.729166679084301, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5798, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 92.27083587646484, "completions/min_length": 44.25, "epoch": 8.641846612062547, "grad_norm": 0.002842992066337205, "kl": 0.29443359375, "learning_rate": 4.570342663220206e-08, "loss": 0.00029421329963952303, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5799, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 301.5, "completions/mean_length": 109.06250381469727, "completions/min_length": 40.5, "epoch": 8.643335815338794, "grad_norm": 1.3895053551594259, "kl": 0.2607421875, "learning_rate": 4.5604697898254555e-08, "loss": 0.01219988614320755, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5800, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 92.32291793823242, "completions/min_length": 38.25, "epoch": 8.644825018615041, "grad_norm": 0.0022521376986313134, "kl": 0.298828125, "learning_rate": 4.550607082096497e-08, "loss": 0.00029907625867053866, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5801, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 106.84375190734863, "completions/min_length": 45.75, "epoch": 8.646314221891288, "grad_norm": 0.8488271235977055, "kl": 0.25390625, "learning_rate": 4.5407545422397996e-08, "loss": 0.03279707580804825, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.2950507290661335, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5802, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 97.42708396911621, "completions/min_length": 40.0, "epoch": 8.647803425167535, "grad_norm": 0.002514794543612321, "kl": 0.272216796875, "learning_rate": 4.5309121724595553e-08, "loss": 0.0002716336166486144, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5803, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 94.22916984558105, "completions/min_length": 42.75, "epoch": 8.649292628443783, "grad_norm": 2.001205419333683, "kl": 0.255615234375, "learning_rate": 4.521079974957681e-08, "loss": -0.0058191027492284775, "memory(GiB)": 112.53, "reward": 1.8020833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833358168602, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5804, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.75, "completions/mean_length": 101.54166984558105, "completions/min_length": 35.5, "epoch": 8.65078183172003, "grad_norm": 0.002259726047087987, "kl": 0.248046875, "learning_rate": 4.511257951933817e-08, "loss": 0.0002483565185684711, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5805, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 110.05208396911621, "completions/min_length": 44.5, "epoch": 8.652271034996277, "grad_norm": 0.004248774927989311, "kl": 0.23681640625, "learning_rate": 4.5014461055853544e-08, "loss": 0.00023703568149358034, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5806, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 86.06250381469727, "completions/min_length": 40.25, "epoch": 8.653760238272524, "grad_norm": 0.002568745948132562, "kl": 0.30078125, "learning_rate": 4.491644438107367e-08, "loss": 0.00030148992664180696, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5807, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 103.84375381469727, "completions/min_length": 47.5, "epoch": 8.655249441548772, "grad_norm": 0.002445292725991318, "kl": 0.251708984375, "learning_rate": 4.4818529516926716e-08, "loss": 0.000251935824053362, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5808, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.5, "completions/mean_length": 103.01041984558105, "completions/min_length": 44.0, "epoch": 8.656738644825019, "grad_norm": 0.8724372007511753, "kl": 0.256591796875, "learning_rate": 4.472071648531806e-08, "loss": -0.008744766004383564, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5809, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 95.67708396911621, "completions/min_length": 37.75, "epoch": 8.658227848101266, "grad_norm": 0.0026185611191936573, "kl": 0.2646484375, "learning_rate": 4.462300530813024e-08, "loss": 0.0002644937194418162, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5810, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 89.29166793823242, "completions/min_length": 36.5, "epoch": 8.659717051377513, "grad_norm": 0.0020976310207377834, "kl": 0.282470703125, "learning_rate": 4.452539600722333e-08, "loss": 0.0002825524134095758, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5811, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.0, "completions/mean_length": 83.81250190734863, "completions/min_length": 40.5, "epoch": 8.66120625465376, "grad_norm": 0.0030497053749080154, "kl": 0.28369140625, "learning_rate": 4.442788860443391e-08, "loss": 0.0002840826928149909, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5812, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 98.94791793823242, "completions/min_length": 45.75, "epoch": 8.662695457930008, "grad_norm": 0.002642839219222837, "kl": 0.253173828125, "learning_rate": 4.43304831215765e-08, "loss": 0.0002531202044337988, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5813, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 97.47916793823242, "completions/min_length": 43.0, "epoch": 8.664184661206255, "grad_norm": 0.0025379171956126326, "kl": 0.25830078125, "learning_rate": 4.423317958044248e-08, "loss": 0.0002582021988928318, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5814, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 93.72916793823242, "completions/min_length": 47.0, "epoch": 8.665673864482502, "grad_norm": 1.0370606083333864, "kl": 0.27685546875, "learning_rate": 4.4135978002800276e-08, "loss": 0.005522271152585745, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5815, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.0, "completions/mean_length": 93.98958396911621, "completions/min_length": 36.5, "epoch": 8.667163067758748, "grad_norm": 0.002627563781014105, "kl": 0.281982421875, "learning_rate": 4.4038878410395995e-08, "loss": 0.00028172775637358427, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5816, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 94.5, "completions/min_length": 41.75, "epoch": 8.668652271034997, "grad_norm": 0.060410689282426835, "kl": 0.3076171875, "learning_rate": 4.394188082495231e-08, "loss": 0.0003076210559811443, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5817, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 102.85416984558105, "completions/min_length": 42.25, "epoch": 8.670141474311244, "grad_norm": 0.0025396170855434405, "kl": 0.2451171875, "learning_rate": 4.3844985268169544e-08, "loss": 0.00024483082233928144, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5818, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/mean_length": 93.03125381469727, "completions/min_length": 46.75, "epoch": 8.67163067758749, "grad_norm": 0.0025431292282691582, "kl": 0.2841796875, "learning_rate": 4.3748191761725004e-08, "loss": 0.00028408324578776956, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5819, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.75, "completions/mean_length": 89.01041984558105, "completions/min_length": 40.5, "epoch": 8.673119880863737, "grad_norm": 0.0023088827287059696, "kl": 0.27880859375, "learning_rate": 4.365150032727311e-08, "loss": 0.00027870957273989916, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5820, "train_speed(iter/s)": 0.027009 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.0, "completions/mean_length": 104.14583396911621, "completions/min_length": 35.25, "epoch": 8.674609084139986, "grad_norm": 0.003584239022351707, "kl": 0.244873046875, "learning_rate": 4.3554910986445815e-08, "loss": 0.000244611146626994, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5821, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.5, "completions/mean_length": 108.82291984558105, "completions/min_length": 45.75, "epoch": 8.676098287416233, "grad_norm": 0.0026419397512636392, "kl": 0.26025390625, "learning_rate": 4.345842376085152e-08, "loss": 0.0002605257905088365, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5822, "train_speed(iter/s)": 0.027009 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.75, "completions/mean_length": 90.03125381469727, "completions/min_length": 39.75, "epoch": 8.67758749069248, "grad_norm": 1.4091168334183997, "kl": 0.2880859375, "learning_rate": 4.3362038672076605e-08, "loss": -0.01732386276125908, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5823, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 337.25, "completions/mean_length": 117.1250057220459, "completions/min_length": 39.5, "epoch": 8.679076693968726, "grad_norm": 0.00244686873259656, "kl": 0.2646484375, "learning_rate": 4.326575574168384e-08, "loss": 0.00026448260177858174, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5824, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.5, "completions/mean_length": 90.44791984558105, "completions/min_length": 42.0, "epoch": 8.680565897244975, "grad_norm": 0.0027350074232231894, "kl": 0.30615234375, "learning_rate": 4.316957499121376e-08, "loss": 0.0003059001755900681, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5825, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 95.85416984558105, "completions/min_length": 44.0, "epoch": 8.682055100521222, "grad_norm": 0.002607646796132005, "kl": 0.248779296875, "learning_rate": 4.307349644218367e-08, "loss": 0.0002486296580173075, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5826, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 89.90625190734863, "completions/min_length": 37.25, "epoch": 8.683544303797468, "grad_norm": 0.002562292434630394, "kl": 0.259033203125, "learning_rate": 4.297752011608813e-08, "loss": 0.00025896503939293325, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5827, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.5, "completions/mean_length": 110.73958587646484, "completions/min_length": 44.75, "epoch": 8.685033507073715, "grad_norm": 0.0074782814985333386, "kl": 0.25244140625, "learning_rate": 4.288164603439892e-08, "loss": 0.00025263038696721196, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5828, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.5, "completions/mean_length": 100.55208778381348, "completions/min_length": 41.25, "epoch": 8.686522710349962, "grad_norm": 0.002772043988098712, "kl": 0.29931640625, "learning_rate": 4.278587421856478e-08, "loss": 0.0002985287574119866, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5829, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.75, "completions/mean_length": 104.40625190734863, "completions/min_length": 46.75, "epoch": 8.68801191362621, "grad_norm": 0.0023812169752360636, "kl": 0.2412109375, "learning_rate": 4.2690204690011666e-08, "loss": 0.0002411619934719056, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5830, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 110.93750381469727, "completions/min_length": 43.0, "epoch": 8.689501116902457, "grad_norm": 0.0027348433487051713, "kl": 0.251708984375, "learning_rate": 4.259463747014258e-08, "loss": 0.0002512378559913486, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5831, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.0, "completions/mean_length": 107.08333587646484, "completions/min_length": 46.5, "epoch": 8.690990320178704, "grad_norm": 0.009966507136778833, "kl": 0.26513671875, "learning_rate": 4.249917258033764e-08, "loss": 0.0002648982626851648, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5832, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 293.0, "completions/mean_length": 102.61458587646484, "completions/min_length": 39.0, "epoch": 8.692479523454951, "grad_norm": 0.002438605626356326, "kl": 0.267333984375, "learning_rate": 4.2403810041954256e-08, "loss": 0.0002676776493899524, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5833, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 94.61458587646484, "completions/min_length": 31.5, "epoch": 8.6939687267312, "grad_norm": 0.0024136355180805528, "kl": 0.2783203125, "learning_rate": 4.2308549876326705e-08, "loss": 0.0002788849815260619, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5834, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 90.39583778381348, "completions/min_length": 38.25, "epoch": 8.695457930007446, "grad_norm": 0.002297410546170987, "kl": 0.280517578125, "learning_rate": 4.221339210476649e-08, "loss": 0.00028019442106597126, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5835, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 333.75, "completions/mean_length": 104.73958587646484, "completions/min_length": 34.75, "epoch": 8.696947133283693, "grad_norm": 0.0034382183184640987, "kl": 0.272216796875, "learning_rate": 4.211833674856213e-08, "loss": 0.0002723446814343333, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5836, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 100.78125190734863, "completions/min_length": 47.25, "epoch": 8.69843633655994, "grad_norm": 0.002775665054826591, "kl": 0.25927734375, "learning_rate": 4.20233838289793e-08, "loss": 0.000259211752563715, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5837, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 87.62500381469727, "completions/min_length": 38.25, "epoch": 8.699925539836187, "grad_norm": 1.7123283693906581, "kl": 0.28564453125, "learning_rate": 4.192853336726065e-08, "loss": 0.020201006904244423, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5838, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 163.0, "completions/mean_length": 81.78125190734863, "completions/min_length": 37.5, "epoch": 8.701414743112435, "grad_norm": 1.8863594075558996, "kl": 0.2939453125, "learning_rate": 4.183378538462612e-08, "loss": 0.007355479057878256, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5839, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 91.59375381469727, "completions/min_length": 39.0, "epoch": 8.702903946388682, "grad_norm": 1.5885052971492226, "kl": 0.2490234375, "learning_rate": 4.1739139902272514e-08, "loss": 0.00875011645257473, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5840, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.75, "completions/mean_length": 103.21875190734863, "completions/min_length": 44.5, "epoch": 8.704393149664929, "grad_norm": 0.0025191940688607632, "kl": 0.243896484375, "learning_rate": 4.1644596941373846e-08, "loss": 0.0002441596006974578, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5841, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.0, "completions/mean_length": 100.98958587646484, "completions/min_length": 46.0, "epoch": 8.705882352941176, "grad_norm": 1.5401130909479601, "kl": 0.25830078125, "learning_rate": 4.155015652308102e-08, "loss": -0.005919103976339102, "memory(GiB)": 112.53, "reward": 1.9479166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.9479166716337204, "rewards/CineAccuracyORM/std": 0.1037127822637558, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5842, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.25, "completions/mean_length": 97.01041984558105, "completions/min_length": 41.5, "epoch": 8.707371556217424, "grad_norm": 0.7124776562254821, "kl": 0.29052734375, "learning_rate": 4.14558186685221e-08, "loss": 0.000413828733144328, "memory(GiB)": 112.53, "reward": 1.90625, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5843, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 102.32292175292969, "completions/min_length": 42.5, "epoch": 8.708860759493671, "grad_norm": 0.9244194282144441, "kl": 0.3046875, "learning_rate": 4.1361583398802414e-08, "loss": 0.0016820214223116636, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5844, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 113.11458396911621, "completions/min_length": 42.5, "epoch": 8.710349962769918, "grad_norm": 0.0023973637649225862, "kl": 0.238037109375, "learning_rate": 4.1267450735003985e-08, "loss": 0.0002379315410507843, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5845, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 87.40625190734863, "completions/min_length": 38.25, "epoch": 8.711839166046165, "grad_norm": 0.002860633803957534, "kl": 0.27978515625, "learning_rate": 4.117342069818602e-08, "loss": 0.00027969456277787685, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5846, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 100.65625381469727, "completions/min_length": 41.0, "epoch": 8.713328369322413, "grad_norm": 0.002263074318116974, "kl": 0.25341796875, "learning_rate": 4.1079493309384874e-08, "loss": 0.0002528838813304901, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5847, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 102.29166984558105, "completions/min_length": 43.5, "epoch": 8.71481757259866, "grad_norm": 0.0024291813093863337, "kl": 0.25634765625, "learning_rate": 4.0985668589613666e-08, "loss": 0.00025576198822818696, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5848, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.5, "completions/mean_length": 103.12500190734863, "completions/min_length": 40.0, "epoch": 8.716306775874907, "grad_norm": 0.002350513026648419, "kl": 0.243896484375, "learning_rate": 4.089194655986306e-08, "loss": 0.00024361591204069555, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5849, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.75, "completions/mean_length": 98.17708396911621, "completions/min_length": 47.0, "epoch": 8.717795979151154, "grad_norm": 0.002429326938540079, "kl": 0.264404296875, "learning_rate": 4.079832724110005e-08, "loss": 0.00026423006784170866, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5850, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 96.98958587646484, "completions/min_length": 35.0, "epoch": 8.719285182427402, "grad_norm": 0.0023204204541826825, "kl": 0.26806640625, "learning_rate": 4.070481065426923e-08, "loss": 0.00026775788865052164, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5851, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.75, "completions/mean_length": 97.83333587646484, "completions/min_length": 41.25, "epoch": 8.720774385703649, "grad_norm": 0.002582199039931173, "kl": 0.275390625, "learning_rate": 4.061139682029191e-08, "loss": 0.00027491821674630046, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5852, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 91.27083396911621, "completions/min_length": 39.75, "epoch": 8.722263588979896, "grad_norm": 0.00292016556262972, "kl": 0.2783203125, "learning_rate": 4.051808576006649e-08, "loss": 0.00027859793044626713, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5853, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 101.70833587646484, "completions/min_length": 43.75, "epoch": 8.723752792256143, "grad_norm": 0.0021754821980829874, "kl": 0.236083984375, "learning_rate": 4.042487749446854e-08, "loss": 0.000235986546613276, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5854, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 96.98958778381348, "completions/min_length": 33.25, "epoch": 8.72524199553239, "grad_norm": 0.002302256134850069, "kl": 0.277099609375, "learning_rate": 4.0331772044350235e-08, "loss": 0.00027710641734302044, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5855, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 94.85416793823242, "completions/min_length": 43.75, "epoch": 8.726731198808638, "grad_norm": 0.8054913695162004, "kl": 0.2724609375, "learning_rate": 4.0238769430541166e-08, "loss": 0.020568547770380974, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5856, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.5, "completions/mean_length": 102.28125381469727, "completions/min_length": 46.5, "epoch": 8.728220402084885, "grad_norm": 0.0024595763232492258, "kl": 0.264892578125, "learning_rate": 4.01458696738477e-08, "loss": 0.00026479962980374694, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5857, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.75, "completions/mean_length": 94.67708587646484, "completions/min_length": 31.0, "epoch": 8.729709605361132, "grad_norm": 0.0024626045191344956, "kl": 0.2724609375, "learning_rate": 4.0053072795053154e-08, "loss": 0.000272014964139089, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5858, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.0, "completions/mean_length": 93.82291793823242, "completions/min_length": 36.0, "epoch": 8.731198808637378, "grad_norm": 0.0027440987574515684, "kl": 0.270263671875, "learning_rate": 3.996037881491815e-08, "loss": 0.00027042991132475436, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5859, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 282.25, "completions/mean_length": 111.68750190734863, "completions/min_length": 37.25, "epoch": 8.732688011913627, "grad_norm": 0.002508631476860193, "kl": 0.2421875, "learning_rate": 3.9867787754179705e-08, "loss": 0.000242515467107296, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5860, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.010416666666666666, "completions/max_length": 399.25, "completions/mean_length": 105.46875381469727, "completions/min_length": 44.75, "epoch": 8.734177215189874, "grad_norm": 1.410516607904646, "kl": 0.264892578125, "learning_rate": 3.977529963355253e-08, "loss": 0.0623023621737957, "memory(GiB)": 112.53, "reward": 1.8645833730697632, "reward_std": 0.1020546555519104, "rewards/CineAccuracyORM/mean": 0.8750000149011612, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 5861, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.75, "completions/mean_length": 91.87500190734863, "completions/min_length": 40.75, "epoch": 8.73566641846612, "grad_norm": 1.4167037306767434, "kl": 0.3583984375, "learning_rate": 3.968291447372751e-08, "loss": 0.02881987765431404, "memory(GiB)": 112.53, "reward": 1.8645833432674408, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8645833358168602, "rewards/CineAccuracyORM/std": 0.12724433839321136, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5862, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 103.25000381469727, "completions/min_length": 41.25, "epoch": 8.737155621742367, "grad_norm": 0.00248016327703993, "kl": 0.27001953125, "learning_rate": 3.9590632295373295e-08, "loss": 0.0002696415758691728, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5863, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 91.98958587646484, "completions/min_length": 42.5, "epoch": 8.738644825018614, "grad_norm": 0.0027530439753216296, "kl": 0.2705078125, "learning_rate": 3.949845311913491e-08, "loss": 0.00027036393294110894, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5864, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 96.98958396911621, "completions/min_length": 44.25, "epoch": 8.740134028294863, "grad_norm": 0.0026479385706775233, "kl": 0.259033203125, "learning_rate": 3.940637696563459e-08, "loss": 0.00025945284869521856, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5865, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 93.20833778381348, "completions/min_length": 50.5, "epoch": 8.74162323157111, "grad_norm": 0.002952798780160022, "kl": 0.275390625, "learning_rate": 3.9314403855471635e-08, "loss": 0.00027521763695403934, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5866, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.25, "completions/mean_length": 93.86458587646484, "completions/min_length": 38.25, "epoch": 8.743112434847356, "grad_norm": 0.0025701020258502715, "kl": 0.27294921875, "learning_rate": 3.922253380922186e-08, "loss": 0.00027266930555924773, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5867, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 98.88541984558105, "completions/min_length": 44.25, "epoch": 8.744601638123603, "grad_norm": 0.002382683949243561, "kl": 0.25439453125, "learning_rate": 3.913076684743849e-08, "loss": 0.00025408220244571567, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5868, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 87.45833396911621, "completions/min_length": 43.25, "epoch": 8.746090841399852, "grad_norm": 0.8321155999970966, "kl": 0.341796875, "learning_rate": 3.903910299065144e-08, "loss": -0.0063652703538537025, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5869, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 102.53125190734863, "completions/min_length": 35.75, "epoch": 8.747580044676099, "grad_norm": 0.0023710901960010008, "kl": 0.258544921875, "learning_rate": 3.894754225936753e-08, "loss": 0.00025870848912745714, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5870, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.25, "completions/mean_length": 98.34375190734863, "completions/min_length": 45.0, "epoch": 8.749069247952345, "grad_norm": 2.394013823839927, "kl": 0.2880859375, "learning_rate": 3.8856084674070865e-08, "loss": 0.0023548887111246586, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5871, "train_speed(iter/s)": 0.027009 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.5, "completions/mean_length": 103.23958778381348, "completions/min_length": 35.75, "epoch": 8.750558451228592, "grad_norm": 0.9107791956085315, "kl": 0.29638671875, "learning_rate": 3.876473025522187e-08, "loss": 0.00562415411695838, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5872, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 105.64583587646484, "completions/min_length": 45.0, "epoch": 8.75204765450484, "grad_norm": 0.0023572817706764876, "kl": 0.246826171875, "learning_rate": 3.867347902325846e-08, "loss": 0.0002464044082444161, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5873, "train_speed(iter/s)": 0.027009 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 95.18750190734863, "completions/min_length": 41.5, "epoch": 8.753536857781087, "grad_norm": 0.002490034171673948, "kl": 0.270751953125, "learning_rate": 3.8582330998595094e-08, "loss": 0.00027079670690000057, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5874, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 104.76041984558105, "completions/min_length": 37.75, "epoch": 8.755026061057334, "grad_norm": 0.002707341984502929, "kl": 0.261962890625, "learning_rate": 3.849128620162334e-08, "loss": 0.00026208252529613674, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5875, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.5, "completions/mean_length": 84.30208396911621, "completions/min_length": 36.5, "epoch": 8.756515264333581, "grad_norm": 0.0024287343002282863, "kl": 0.3056640625, "learning_rate": 3.8400344652711635e-08, "loss": 0.00030552406678907573, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5876, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 95.82291984558105, "completions/min_length": 44.0, "epoch": 8.75800446760983, "grad_norm": 1.4507072476798637, "kl": 0.27587890625, "learning_rate": 3.830950637220515e-08, "loss": -0.0011116629466414452, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5877, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 91.52083587646484, "completions/min_length": 36.0, "epoch": 8.759493670886076, "grad_norm": 0.0037077094444573316, "kl": 0.2685546875, "learning_rate": 3.821877138042623e-08, "loss": 0.00026886380510404706, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5878, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.25, "completions/mean_length": 98.39583587646484, "completions/min_length": 40.0, "epoch": 8.760982874162323, "grad_norm": 0.0025209969323835143, "kl": 0.257568359375, "learning_rate": 3.812813969767398e-08, "loss": 0.0002575437247287482, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5879, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.75, "completions/mean_length": 109.79166984558105, "completions/min_length": 43.75, "epoch": 8.76247207743857, "grad_norm": 0.8121256362687906, "kl": 0.24951171875, "learning_rate": 3.803761134422434e-08, "loss": 0.016190936788916588, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5880, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.75, "completions/mean_length": 89.09375190734863, "completions/min_length": 34.0, "epoch": 8.763961280714817, "grad_norm": 0.002639599343486212, "kl": 0.27783203125, "learning_rate": 3.794718634033012e-08, "loss": 0.0002776042092591524, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5881, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 102.29166984558105, "completions/min_length": 44.5, "epoch": 8.765450483991065, "grad_norm": 0.0023268718458863027, "kl": 0.236083984375, "learning_rate": 3.785686470622118e-08, "loss": 0.00023601154680363834, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5882, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.25, "completions/mean_length": 102.36458396911621, "completions/min_length": 41.75, "epoch": 8.766939687267312, "grad_norm": 0.002199759243542757, "kl": 0.2763671875, "learning_rate": 3.776664646210415e-08, "loss": 0.0002762613876257092, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5883, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 99.67708587646484, "completions/min_length": 38.5, "epoch": 8.768428890543559, "grad_norm": 2.169985894407187, "kl": 0.265869140625, "learning_rate": 3.767653162816242e-08, "loss": 0.0154982078820467, "memory(GiB)": 112.53, "reward": 1.2708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.27083334140479565, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5884, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 92.17708587646484, "completions/min_length": 42.75, "epoch": 8.769918093819806, "grad_norm": 0.002916666991567178, "kl": 0.26806640625, "learning_rate": 3.758652022455644e-08, "loss": 0.00026846048422157764, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5885, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 97.20833587646484, "completions/min_length": 42.25, "epoch": 8.771407297096054, "grad_norm": 0.0022879835894750763, "kl": 0.26806640625, "learning_rate": 3.7496612271423314e-08, "loss": 0.0002679037570487708, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5886, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 89.44791984558105, "completions/min_length": 40.25, "epoch": 8.772896500372301, "grad_norm": 0.0025811702781548197, "kl": 0.2744140625, "learning_rate": 3.740680778887739e-08, "loss": 0.0002740334311965853, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5887, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.25, "completions/mean_length": 92.92708587646484, "completions/min_length": 43.0, "epoch": 8.774385703648548, "grad_norm": 0.002597219655988771, "kl": 0.251708984375, "learning_rate": 3.731710679700922e-08, "loss": 0.00025181006640195847, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5888, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.5, "completions/mean_length": 81.30208396911621, "completions/min_length": 39.0, "epoch": 8.775874906924795, "grad_norm": 0.0024752794804894914, "kl": 0.3017578125, "learning_rate": 3.722750931588686e-08, "loss": 0.00030212890123948455, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5889, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.75, "completions/mean_length": 97.26041984558105, "completions/min_length": 47.75, "epoch": 8.777364110201042, "grad_norm": 0.0025078477783267708, "kl": 0.2607421875, "learning_rate": 3.713801536555483e-08, "loss": 0.0002606030029710382, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5890, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 106.65625190734863, "completions/min_length": 49.5, "epoch": 8.77885331347729, "grad_norm": 0.002333746857322324, "kl": 0.2529296875, "learning_rate": 3.70486249660345e-08, "loss": 0.00025289723998866975, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5891, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.0, "completions/mean_length": 101.69791984558105, "completions/min_length": 37.5, "epoch": 8.780342516753537, "grad_norm": 0.0021701141003081497, "kl": 0.25244140625, "learning_rate": 3.695933813732444e-08, "loss": 0.00025210832245647907, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5892, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 90.93750381469727, "completions/min_length": 40.0, "epoch": 8.781831720029784, "grad_norm": 0.6791738044790079, "kl": 0.5283203125, "learning_rate": 3.687015489939943e-08, "loss": -0.011536694131791592, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5893, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.0, "completions/mean_length": 109.71875190734863, "completions/min_length": 43.0, "epoch": 8.78332092330603, "grad_norm": 1.698618380068111, "kl": 0.263671875, "learning_rate": 3.678107527221164e-08, "loss": -0.046919967979192734, "memory(GiB)": 112.53, "reward": 1.5416666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.5416666716337204, "rewards/CineAccuracyORM/std": 0.3712640330195427, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5894, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.5, "completions/mean_length": 104.59375190734863, "completions/min_length": 44.25, "epoch": 8.784810126582279, "grad_norm": 0.6467179128435226, "kl": 0.27099609375, "learning_rate": 3.669209927568978e-08, "loss": 0.014363875612616539, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5895, "train_speed(iter/s)": 0.027009 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 99.84375190734863, "completions/min_length": 35.25, "epoch": 8.786299329858526, "grad_norm": 1.7406931292809094, "kl": 0.254150390625, "learning_rate": 3.660322692973933e-08, "loss": 0.01649535447359085, "memory(GiB)": 112.53, "reward": 1.854166716337204, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.27512967213988304, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5896, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.75, "completions/mean_length": 95.76041793823242, "completions/min_length": 42.25, "epoch": 8.787788533134773, "grad_norm": 0.0028902651253716618, "kl": 0.27685546875, "learning_rate": 3.6514458254242934e-08, "loss": 0.0002765278914012015, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5897, "train_speed(iter/s)": 0.02701 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 94.37500381469727, "completions/min_length": 42.25, "epoch": 8.78927773641102, "grad_norm": 1.30677352078214, "kl": 0.260498046875, "learning_rate": 3.642579326905942e-08, "loss": -0.011980671435594559, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5898, "train_speed(iter/s)": 0.027011 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.5, "completions/mean_length": 105.04166793823242, "completions/min_length": 43.5, "epoch": 8.790766939687268, "grad_norm": 0.0023123494598556267, "kl": 0.240966796875, "learning_rate": 3.6337231994025166e-08, "loss": 0.00024067366030067205, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5899, "train_speed(iter/s)": 0.027011 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 93.43750381469727, "completions/min_length": 40.0, "epoch": 8.792256142963515, "grad_norm": 0.0024669029099290457, "kl": 0.263916015625, "learning_rate": 3.624877444895269e-08, "loss": 0.00026411295402795076, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5900, "train_speed(iter/s)": 0.027012 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 310.75, "completions/mean_length": 112.06250381469727, "completions/min_length": 44.5, "epoch": 8.793745346239762, "grad_norm": 0.009076152786855734, "kl": 0.249267578125, "learning_rate": 3.616042065363173e-08, "loss": 0.00024930317886173725, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5901, "train_speed(iter/s)": 0.027011 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 332.25, "completions/mean_length": 103.04166984558105, "completions/min_length": 40.0, "epoch": 8.795234549516008, "grad_norm": 0.0031460750362850808, "kl": 0.261474609375, "learning_rate": 3.607217062782869e-08, "loss": 0.00026116412482224405, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5902, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.5, "completions/mean_length": 97.89583778381348, "completions/min_length": 41.0, "epoch": 8.796723752792257, "grad_norm": 1.1674172778536227, "kl": 0.258056640625, "learning_rate": 3.598402439128656e-08, "loss": 0.046455226838588715, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5903, "train_speed(iter/s)": 0.027008 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.5, "completions/mean_length": 108.37500381469727, "completions/min_length": 47.5, "epoch": 8.798212956068504, "grad_norm": 0.0022167669463748923, "kl": 0.263427734375, "learning_rate": 3.589598196372556e-08, "loss": 0.0002631905081216246, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5904, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 108.20833396911621, "completions/min_length": 42.75, "epoch": 8.79970215934475, "grad_norm": 0.0022997240345435303, "kl": 0.238525390625, "learning_rate": 3.5808043364842113e-08, "loss": 0.00023868115385994315, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5905, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.25, "completions/mean_length": 91.41666793823242, "completions/min_length": 33.5, "epoch": 8.801191362620997, "grad_norm": 0.0022111230058688676, "kl": 0.27783203125, "learning_rate": 3.572020861430997e-08, "loss": 0.00027753604808822274, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5906, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 92.09375190734863, "completions/min_length": 38.5, "epoch": 8.802680565897244, "grad_norm": 0.002516095206480577, "kl": 0.2763671875, "learning_rate": 3.56324777317793e-08, "loss": 0.00027697155019268394, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5907, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 98.91666984558105, "completions/min_length": 41.0, "epoch": 8.804169769173493, "grad_norm": 0.0034331756850600062, "kl": 0.267333984375, "learning_rate": 3.554485073687702e-08, "loss": 0.000267553492449224, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5908, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 95.73958587646484, "completions/min_length": 45.75, "epoch": 8.80565897244974, "grad_norm": 0.0024813498119562014, "kl": 0.27490234375, "learning_rate": 3.545732764920717e-08, "loss": 0.00027437281096354127, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5909, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 97.44791793823242, "completions/min_length": 42.5, "epoch": 8.807148175725986, "grad_norm": 0.7625304861637047, "kl": 0.26953125, "learning_rate": 3.536990848834997e-08, "loss": 0.007072823122143745, "memory(GiB)": 112.53, "reward": 1.7812500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500149011612, "rewards/CineAccuracyORM/std": 0.34448449313640594, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5910, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 86.56250381469727, "completions/min_length": 35.75, "epoch": 8.808637379002233, "grad_norm": 1.4049827219680913, "kl": 0.31396484375, "learning_rate": 3.528259327386296e-08, "loss": 0.00931800901889801, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5911, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 85.55208396911621, "completions/min_length": 41.5, "epoch": 8.810126582278482, "grad_norm": 0.0022673097516136137, "kl": 0.2900390625, "learning_rate": 3.519538202528011e-08, "loss": 0.00028982164803892374, "memory(GiB)": 112.53, "reward": 1.5833333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5912, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.25, "completions/mean_length": 94.93750381469727, "completions/min_length": 37.5, "epoch": 8.811615785554729, "grad_norm": 0.002279716590198821, "kl": 0.28955078125, "learning_rate": 3.5108274762112144e-08, "loss": 0.0002896194055210799, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5913, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 102.77083587646484, "completions/min_length": 42.75, "epoch": 8.813104988830975, "grad_norm": 0.0033224149322105077, "kl": 0.251220703125, "learning_rate": 3.5021271503846594e-08, "loss": 0.0002513024373911321, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5914, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 102.87500190734863, "completions/min_length": 39.25, "epoch": 8.814594192107222, "grad_norm": 0.002359266085777178, "kl": 0.274658203125, "learning_rate": 3.493437226994761e-08, "loss": 0.00027453177608549595, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5915, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.75, "completions/mean_length": 96.37500381469727, "completions/min_length": 33.5, "epoch": 8.816083395383469, "grad_norm": 0.002432410155469409, "kl": 0.271484375, "learning_rate": 3.4847577079856363e-08, "loss": 0.00027184729697182775, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5916, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.0, "completions/mean_length": 97.46875381469727, "completions/min_length": 40.5, "epoch": 8.817572598659718, "grad_norm": 0.8615343345118704, "kl": 0.421875, "learning_rate": 3.4760885952990425e-08, "loss": 0.0015039684949442744, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5917, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 95.57291984558105, "completions/min_length": 38.75, "epoch": 8.819061801935964, "grad_norm": 0.0023403732034841655, "kl": 0.2861328125, "learning_rate": 3.467429890874424e-08, "loss": 0.00028671935433521867, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5918, "train_speed(iter/s)": 0.026999 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.75, "completions/mean_length": 101.72917175292969, "completions/min_length": 40.75, "epoch": 8.820551005212211, "grad_norm": 0.0023889543008413275, "kl": 0.266845703125, "learning_rate": 3.458781596648891e-08, "loss": 0.00026735811843536794, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5919, "train_speed(iter/s)": 0.026999 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.25, "completions/mean_length": 87.36458396911621, "completions/min_length": 34.5, "epoch": 8.822040208488458, "grad_norm": 0.004198462474091446, "kl": 0.287109375, "learning_rate": 3.450143714557219e-08, "loss": 0.0002868907176889479, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5920, "train_speed(iter/s)": 0.026997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 92.60416984558105, "completions/min_length": 36.5, "epoch": 8.823529411764707, "grad_norm": 0.002372500740316357, "kl": 0.2900390625, "learning_rate": 3.441516246531884e-08, "loss": 0.0002897043596021831, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5921, "train_speed(iter/s)": 0.026996 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.5, "completions/mean_length": 93.64583587646484, "completions/min_length": 42.0, "epoch": 8.825018615040953, "grad_norm": 0.002878990997028433, "kl": 0.26611328125, "learning_rate": 3.432899194502997e-08, "loss": 0.0002658554585650563, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5922, "train_speed(iter/s)": 0.026997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.5, "completions/mean_length": 90.42708587646484, "completions/min_length": 38.25, "epoch": 8.8265078183172, "grad_norm": 0.0023157404736638353, "kl": 0.26416015625, "learning_rate": 3.424292560398351e-08, "loss": 0.00026468682335689664, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5923, "train_speed(iter/s)": 0.026998 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 95.29166984558105, "completions/min_length": 41.75, "epoch": 8.827997021593447, "grad_norm": 0.0025651837681619147, "kl": 0.2568359375, "learning_rate": 3.415696346143415e-08, "loss": 0.00025655116769485176, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5924, "train_speed(iter/s)": 0.026997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.75, "completions/mean_length": 96.47916984558105, "completions/min_length": 37.0, "epoch": 8.829486224869695, "grad_norm": 0.0022109269644731548, "kl": 0.27099609375, "learning_rate": 3.4071105536613184e-08, "loss": 0.000270749325864017, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5925, "train_speed(iter/s)": 0.026997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.75, "completions/mean_length": 93.19791793823242, "completions/min_length": 47.75, "epoch": 8.830975428145942, "grad_norm": 0.0024925771791907947, "kl": 0.275390625, "learning_rate": 3.398535184872858e-08, "loss": 0.0002753163280431181, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5926, "train_speed(iter/s)": 0.026997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 95.93750381469727, "completions/min_length": 45.25, "epoch": 8.832464631422189, "grad_norm": 0.002444479655292782, "kl": 0.28173828125, "learning_rate": 3.389970241696516e-08, "loss": 0.0002811271697282791, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5927, "train_speed(iter/s)": 0.026996 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 92.66666793823242, "completions/min_length": 37.5, "epoch": 8.833953834698436, "grad_norm": 0.002419041243719163, "kl": 0.28271484375, "learning_rate": 3.38141572604842e-08, "loss": 0.0002823050890583545, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5928, "train_speed(iter/s)": 0.026994 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.0, "completions/mean_length": 101.42708587646484, "completions/min_length": 37.0, "epoch": 8.835443037974684, "grad_norm": 0.002278640977065507, "kl": 0.26025390625, "learning_rate": 3.372871639842373e-08, "loss": 0.0002601437736302614, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5929, "train_speed(iter/s)": 0.026992 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 93.93750190734863, "completions/min_length": 41.75, "epoch": 8.836932241250931, "grad_norm": 0.002307815142668451, "kl": 0.2646484375, "learning_rate": 3.364337984989846e-08, "loss": 0.0002645435743033886, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5930, "train_speed(iter/s)": 0.026991 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 87.21875381469727, "completions/min_length": 46.0, "epoch": 8.838421444527178, "grad_norm": 0.0032898921358996765, "kl": 0.29736328125, "learning_rate": 3.355814763399972e-08, "loss": 0.0002972118672914803, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5931, "train_speed(iter/s)": 0.026991 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.25, "completions/mean_length": 96.18750190734863, "completions/min_length": 44.25, "epoch": 8.839910647803425, "grad_norm": 1.269310677238489, "kl": 0.283203125, "learning_rate": 3.347301976979566e-08, "loss": 0.007142854388803244, "memory(GiB)": 112.53, "reward": 1.6041667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5932, "train_speed(iter/s)": 0.026988 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.5, "completions/mean_length": 106.42708396911621, "completions/min_length": 44.75, "epoch": 8.841399851079672, "grad_norm": 0.002378475027953801, "kl": 0.262451171875, "learning_rate": 3.338799627633093e-08, "loss": 0.00026234667166136205, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5933, "train_speed(iter/s)": 0.026987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 107.58333587646484, "completions/min_length": 47.5, "epoch": 8.84288905435592, "grad_norm": 0.002286971132080346, "kl": 0.26611328125, "learning_rate": 3.33030771726267e-08, "loss": 0.00026596765383146703, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5934, "train_speed(iter/s)": 0.026987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 98.45833587646484, "completions/min_length": 48.25, "epoch": 8.844378257632167, "grad_norm": 0.002177501382069581, "kl": 0.25, "learning_rate": 3.3218262477681283e-08, "loss": 0.0002498787653166801, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5935, "train_speed(iter/s)": 0.026987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 103.65625190734863, "completions/min_length": 41.0, "epoch": 8.845867460908414, "grad_norm": 0.0024180131475630183, "kl": 0.264892578125, "learning_rate": 3.3133552210468874e-08, "loss": 0.0002648838562890887, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5936, "train_speed(iter/s)": 0.026989 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 102.40625190734863, "completions/min_length": 35.0, "epoch": 8.84735666418466, "grad_norm": 0.00237996044531409, "kl": 0.249755859375, "learning_rate": 3.3048946389941156e-08, "loss": 0.00024978912551887333, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5937, "train_speed(iter/s)": 0.02699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 93.81250381469727, "completions/min_length": 42.25, "epoch": 8.84884586746091, "grad_norm": 0.004642723170914307, "kl": 0.2890625, "learning_rate": 3.2964445035025656e-08, "loss": 0.00028857061988674104, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5938, "train_speed(iter/s)": 0.026991 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 100.44791984558105, "completions/min_length": 32.0, "epoch": 8.850335070737156, "grad_norm": 0.005120532864887748, "kl": 0.27880859375, "learning_rate": 3.288004816462708e-08, "loss": 0.0002788783749565482, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5939, "train_speed(iter/s)": 0.026992 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 278.25, "completions/mean_length": 107.23958587646484, "completions/min_length": 42.25, "epoch": 8.851824274013403, "grad_norm": 0.0024004292049008037, "kl": 0.236328125, "learning_rate": 3.279575579762656e-08, "loss": 0.0002362878294661641, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5940, "train_speed(iter/s)": 0.026992 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 100.01041793823242, "completions/min_length": 47.75, "epoch": 8.85331347728965, "grad_norm": 1.3827991190499007, "kl": 0.260009765625, "learning_rate": 3.2711567952881824e-08, "loss": 0.0101751908659935, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5941, "train_speed(iter/s)": 0.026993 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 91.92708587646484, "completions/min_length": 37.75, "epoch": 8.854802680565896, "grad_norm": 1.3234699779674661, "kl": 0.27685546875, "learning_rate": 3.2627484649227375e-08, "loss": 0.0038626142777502537, "memory(GiB)": 112.53, "reward": 1.9791666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5942, "train_speed(iter/s)": 0.026993 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 95.83333778381348, "completions/min_length": 39.0, "epoch": 8.856291883842145, "grad_norm": 0.0025325033559317864, "kl": 0.27685546875, "learning_rate": 3.254350590547395e-08, "loss": 0.00027664125082083046, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5943, "train_speed(iter/s)": 0.026993 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.25, "completions/mean_length": 107.25000190734863, "completions/min_length": 46.75, "epoch": 8.857781087118392, "grad_norm": 1.7896375065623806, "kl": 0.25537109375, "learning_rate": 3.2459631740409445e-08, "loss": -0.0028685422148555517, "memory(GiB)": 112.53, "reward": 1.6562500596046448, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500223517418, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5944, "train_speed(iter/s)": 0.026994 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 88.39583587646484, "completions/min_length": 34.0, "epoch": 8.859270290394639, "grad_norm": 1.7991967645766873, "kl": 0.28955078125, "learning_rate": 3.2375862172797865e-08, "loss": -0.002493698848411441, "memory(GiB)": 112.53, "reward": 1.9583333432674408, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.9583333432674408, "rewards/CineAccuracyORM/std": 0.09517337381839752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5945, "train_speed(iter/s)": 0.026995 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 85.78125190734863, "completions/min_length": 33.5, "epoch": 8.860759493670885, "grad_norm": 0.0026572381597376577, "kl": 0.2939453125, "learning_rate": 3.2292197221380044e-08, "loss": 0.0002939215046353638, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5946, "train_speed(iter/s)": 0.026995 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.75, "completions/mean_length": 104.43750190734863, "completions/min_length": 46.0, "epoch": 8.862248696947134, "grad_norm": 0.007488245156633661, "kl": 0.2734375, "learning_rate": 3.2208636904873575e-08, "loss": 0.00027339221560396254, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5947, "train_speed(iter/s)": 0.026995 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 89.52083778381348, "completions/min_length": 37.5, "epoch": 8.86373790022338, "grad_norm": 0.0023998091689925085, "kl": 0.271728515625, "learning_rate": 3.2125181241972166e-08, "loss": 0.00027174223214387894, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5948, "train_speed(iter/s)": 0.026996 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 82.52083587646484, "completions/min_length": 33.0, "epoch": 8.865227103499628, "grad_norm": 0.0025532926474764084, "kl": 0.27197265625, "learning_rate": 3.20418302513466e-08, "loss": 0.00027115154080092907, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5949, "train_speed(iter/s)": 0.026998 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 87.23958778381348, "completions/min_length": 38.75, "epoch": 8.866716306775874, "grad_norm": 0.0024539632369481165, "kl": 0.28125, "learning_rate": 3.1958583951643916e-08, "loss": 0.0002811271115206182, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5950, "train_speed(iter/s)": 0.026998 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 90.71875190734863, "completions/min_length": 43.25, "epoch": 8.868205510052123, "grad_norm": 0.0026123275659024328, "kl": 0.274658203125, "learning_rate": 3.187544236148798e-08, "loss": 0.00027480360586196184, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5951, "train_speed(iter/s)": 0.026997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 94.14583396911621, "completions/min_length": 39.75, "epoch": 8.86969471332837, "grad_norm": 0.002766182695092216, "kl": 0.275390625, "learning_rate": 3.179240549947898e-08, "loss": 0.0002755403984338045, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5952, "train_speed(iter/s)": 0.026998 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 84.90625190734863, "completions/min_length": 40.5, "epoch": 8.871183916604616, "grad_norm": 1.0813046782420759, "kl": 0.30322265625, "learning_rate": 3.170947338419383e-08, "loss": 0.035565949976444244, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5953, "train_speed(iter/s)": 0.026999 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 97.68750190734863, "completions/min_length": 46.5, "epoch": 8.872673119880863, "grad_norm": 0.0034900638703920105, "kl": 0.2822265625, "learning_rate": 3.162664603418608e-08, "loss": 0.00028192094760015607, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5954, "train_speed(iter/s)": 0.026997 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 94.46875190734863, "completions/min_length": 39.5, "epoch": 8.874162323157112, "grad_norm": 0.0023180431762781888, "kl": 0.271484375, "learning_rate": 3.1543923467985677e-08, "loss": 0.00027108946233056486, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5955, "train_speed(iter/s)": 0.026998 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.5, "completions/mean_length": 93.43750190734863, "completions/min_length": 39.25, "epoch": 8.875651526433359, "grad_norm": 0.002635346686540439, "kl": 0.27197265625, "learning_rate": 3.146130570409916e-08, "loss": 0.00027201854391023517, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5956, "train_speed(iter/s)": 0.027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 86.97916793823242, "completions/min_length": 37.25, "epoch": 8.877140729709605, "grad_norm": 0.002834131541360128, "kl": 0.2822265625, "learning_rate": 3.137879276100974e-08, "loss": 0.00028223474510014057, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5957, "train_speed(iter/s)": 0.027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.25, "completions/mean_length": 104.46875190734863, "completions/min_length": 43.0, "epoch": 8.878629932985852, "grad_norm": 1.7697353539875658, "kl": 0.24560546875, "learning_rate": 3.129638465717693e-08, "loss": 0.016761217266321182, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5958, "train_speed(iter/s)": 0.027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 88.01042175292969, "completions/min_length": 37.0, "epoch": 8.880119136262099, "grad_norm": 0.002502251920154546, "kl": 0.28515625, "learning_rate": 3.121408141103715e-08, "loss": 0.0002854404738172889, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5959, "train_speed(iter/s)": 0.027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.25, "completions/mean_length": 89.80208587646484, "completions/min_length": 38.5, "epoch": 8.881608339538348, "grad_norm": 0.002464022334760226, "kl": 0.267578125, "learning_rate": 3.113188304100306e-08, "loss": 0.0002674353017937392, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5960, "train_speed(iter/s)": 0.027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 81.12500190734863, "completions/min_length": 36.5, "epoch": 8.883097542814594, "grad_norm": 0.002287715306739567, "kl": 0.306640625, "learning_rate": 3.1049789565464e-08, "loss": 0.0003065178752876818, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5961, "train_speed(iter/s)": 0.027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 89.97916984558105, "completions/min_length": 37.25, "epoch": 8.884586746090841, "grad_norm": 0.0027691877151603257, "kl": 0.26611328125, "learning_rate": 3.096780100278579e-08, "loss": 0.00026557070668786764, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5962, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.75, "completions/mean_length": 90.37500381469727, "completions/min_length": 43.75, "epoch": 8.886075949367088, "grad_norm": 0.002649935159878349, "kl": 0.2880859375, "learning_rate": 3.088591737131074e-08, "loss": 0.0002881901746150106, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5963, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 95.90625190734863, "completions/min_length": 34.5, "epoch": 8.887565152643337, "grad_norm": 0.8430712393015207, "kl": 0.27001953125, "learning_rate": 3.080413868935772e-08, "loss": -0.00535223912447691, "memory(GiB)": 112.53, "reward": 1.6875000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6875000074505806, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5964, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.25, "completions/mean_length": 92.31250190734863, "completions/min_length": 43.0, "epoch": 8.889054355919583, "grad_norm": 0.0022621046405291215, "kl": 0.283203125, "learning_rate": 3.0722464975222296e-08, "loss": 0.0002829260774888098, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5965, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 81.83333587646484, "completions/min_length": 32.0, "epoch": 8.89054355919583, "grad_norm": 0.0025928925315010904, "kl": 0.29541015625, "learning_rate": 3.0640896247176254e-08, "loss": 0.0002952963695861399, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5966, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.75, "completions/mean_length": 103.21875190734863, "completions/min_length": 42.5, "epoch": 8.892032762472077, "grad_norm": 0.0025709704534063113, "kl": 0.248779296875, "learning_rate": 3.0559432523468096e-08, "loss": 0.0002486030280124396, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5967, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 93.90625190734863, "completions/min_length": 38.75, "epoch": 8.893521965748324, "grad_norm": 0.0021442673034905895, "kl": 0.25244140625, "learning_rate": 3.0478073822322756e-08, "loss": 0.0002527580363675952, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5968, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.0, "completions/mean_length": 105.92708587646484, "completions/min_length": 47.25, "epoch": 8.895011169024572, "grad_norm": 0.0030877543440377474, "kl": 0.23291015625, "learning_rate": 3.0396820161941615e-08, "loss": 0.00023284384224098176, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5969, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 94.07291793823242, "completions/min_length": 46.25, "epoch": 8.89650037230082, "grad_norm": 0.002812948508148037, "kl": 0.28369140625, "learning_rate": 3.031567156050269e-08, "loss": 0.0002840376109816134, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5970, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 81.17708587646484, "completions/min_length": 37.75, "epoch": 8.897989575577066, "grad_norm": 0.002526547230182273, "kl": 0.30029296875, "learning_rate": 3.0234628036160445e-08, "loss": 0.0003002002486027777, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5971, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.25, "completions/mean_length": 98.3125, "completions/min_length": 29.0, "epoch": 8.899478778853313, "grad_norm": 0.00235784064235758, "kl": 0.271240234375, "learning_rate": 3.015368960704584e-08, "loss": 0.000271308672381565, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5972, "train_speed(iter/s)": 0.027007 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.5, "completions/mean_length": 102.14583587646484, "completions/min_length": 36.75, "epoch": 8.900967982129561, "grad_norm": 0.0035083273108188345, "kl": 0.26123046875, "learning_rate": 3.0072856291266216e-08, "loss": 0.0002604750916361809, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5973, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 93.30208587646484, "completions/min_length": 38.0, "epoch": 8.902457185405808, "grad_norm": 3.3185438401502845, "kl": 0.274169921875, "learning_rate": 2.9992128106905434e-08, "loss": -0.007325687911361456, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.24763019382953644, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5974, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 90.92708396911621, "completions/min_length": 34.0, "epoch": 8.903946388682055, "grad_norm": 0.002877948009332388, "kl": 0.255615234375, "learning_rate": 2.9911505072024166e-08, "loss": 0.0002554651000536978, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5975, "train_speed(iter/s)": 0.027006 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 90.62500190734863, "completions/min_length": 37.0, "epoch": 8.905435591958302, "grad_norm": 0.0026115511825206923, "kl": 0.261474609375, "learning_rate": 2.983098720465893e-08, "loss": 0.0002616821730043739, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5976, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 89.34375190734863, "completions/min_length": 35.0, "epoch": 8.90692479523455, "grad_norm": 0.0023729250140789677, "kl": 0.26123046875, "learning_rate": 2.9750574522823314e-08, "loss": 0.00026095303473994136, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5977, "train_speed(iter/s)": 0.027005 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 173.25, "completions/mean_length": 87.80208778381348, "completions/min_length": 41.25, "epoch": 8.908413998510797, "grad_norm": 0.0041299412880871425, "kl": 0.287109375, "learning_rate": 2.9670267044507035e-08, "loss": 0.00028706775628961623, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5978, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 92.73958587646484, "completions/min_length": 38.25, "epoch": 8.909903201787044, "grad_norm": 0.8740123282828767, "kl": 0.265625, "learning_rate": 2.9590064787676282e-08, "loss": -0.004217604175209999, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833507180214, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5979, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.75, "completions/mean_length": 100.81250190734863, "completions/min_length": 45.0, "epoch": 8.91139240506329, "grad_norm": 0.0023458633990056724, "kl": 0.272705078125, "learning_rate": 2.9509967770274034e-08, "loss": 0.0002728466934058815, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5980, "train_speed(iter/s)": 0.026999 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 98.69791984558105, "completions/min_length": 39.25, "epoch": 8.91288160833954, "grad_norm": 0.0020966351689562946, "kl": 0.26025390625, "learning_rate": 2.9429976010219236e-08, "loss": 0.0002605331246741116, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5981, "train_speed(iter/s)": 0.027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.5, "completions/mean_length": 97.30208587646484, "completions/min_length": 40.5, "epoch": 8.914370811615786, "grad_norm": 0.003439185100462142, "kl": 0.2626953125, "learning_rate": 2.935008952540763e-08, "loss": 0.0002621005696710199, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5982, "train_speed(iter/s)": 0.027 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 86.89583587646484, "completions/min_length": 36.75, "epoch": 8.915860014892033, "grad_norm": 0.0026000251186902705, "kl": 0.285400390625, "learning_rate": 2.9270308333711303e-08, "loss": 0.000285490823443979, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5983, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 82.57291984558105, "completions/min_length": 38.5, "epoch": 8.91734921816828, "grad_norm": 0.0024958696643850946, "kl": 0.31201171875, "learning_rate": 2.9190632452978703e-08, "loss": 0.00031165097607299685, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5984, "train_speed(iter/s)": 0.027001 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 96.73958587646484, "completions/min_length": 46.0, "epoch": 8.918838421444526, "grad_norm": 0.0022730556343736543, "kl": 0.259521484375, "learning_rate": 2.9111061901035018e-08, "loss": 0.00025964865926653147, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5985, "train_speed(iter/s)": 0.027001 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.5, "completions/mean_length": 89.53125381469727, "completions/min_length": 37.5, "epoch": 8.920327624720775, "grad_norm": 0.00265005480573746, "kl": 0.27978515625, "learning_rate": 2.9031596695681337e-08, "loss": 0.0002802210219670087, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5986, "train_speed(iter/s)": 0.027001 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 84.64583587646484, "completions/min_length": 38.0, "epoch": 8.921816827997022, "grad_norm": 0.07323781257245059, "kl": 0.301025390625, "learning_rate": 2.895223685469578e-08, "loss": 0.00030080898432061076, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5987, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 91.71875381469727, "completions/min_length": 45.5, "epoch": 8.923306031273269, "grad_norm": 0.0022419028507969846, "kl": 0.275390625, "learning_rate": 2.8872982395832523e-08, "loss": 0.00027501751901581883, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5988, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 94.95833587646484, "completions/min_length": 35.75, "epoch": 8.924795234549515, "grad_norm": 0.002652390294622663, "kl": 0.275146484375, "learning_rate": 2.8793833336822227e-08, "loss": 0.00027514685643836856, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5989, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 97.81250381469727, "completions/min_length": 39.25, "epoch": 8.926284437825764, "grad_norm": 0.5230555448761223, "kl": 0.26611328125, "learning_rate": 2.8714789695372054e-08, "loss": 0.016914520412683487, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5990, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.25, "completions/mean_length": 90.66666984558105, "completions/min_length": 48.75, "epoch": 8.92777364110201, "grad_norm": 1.0219346196891108, "kl": 0.26318359375, "learning_rate": 2.863585148916542e-08, "loss": -0.0012244064128026366, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5991, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.75, "completions/mean_length": 100.69791793823242, "completions/min_length": 51.75, "epoch": 8.929262844378258, "grad_norm": 0.0024761777332894318, "kl": 0.276123046875, "learning_rate": 2.8557018735862415e-08, "loss": 0.0002764019591268152, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5992, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.0, "completions/mean_length": 96.34375190734863, "completions/min_length": 37.5, "epoch": 8.930752047654504, "grad_norm": 0.0029738111971049936, "kl": 0.2802734375, "learning_rate": 2.8478291453099323e-08, "loss": 0.0002798687492031604, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5993, "train_speed(iter/s)": 0.027001 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 95.20833587646484, "completions/min_length": 38.0, "epoch": 8.932241250930751, "grad_norm": 0.0023729007097056153, "kl": 0.259521484375, "learning_rate": 2.839966965848889e-08, "loss": 0.00025954688317142427, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5994, "train_speed(iter/s)": 0.027002 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.25, "completions/mean_length": 110.13542175292969, "completions/min_length": 46.25, "epoch": 8.933730454207, "grad_norm": 0.002278784201248857, "kl": 0.25, "learning_rate": 2.8321153369620265e-08, "loss": 0.000250685348873958, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5995, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 83.80208778381348, "completions/min_length": 44.5, "epoch": 8.935219657483247, "grad_norm": 0.0023695322856078912, "kl": 0.28955078125, "learning_rate": 2.8242742604058956e-08, "loss": 0.00028990127611905336, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5996, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.0, "completions/mean_length": 92.37500190734863, "completions/min_length": 47.0, "epoch": 8.936708860759493, "grad_norm": 0.002548762975607768, "kl": 0.267578125, "learning_rate": 2.8164437379347038e-08, "loss": 0.00026754988357424736, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5997, "train_speed(iter/s)": 0.027004 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 94.95833587646484, "completions/min_length": 43.75, "epoch": 8.93819806403574, "grad_norm": 0.004445243103460245, "kl": 0.2587890625, "learning_rate": 2.8086237713002782e-08, "loss": 0.0002587816270533949, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5998, "train_speed(iter/s)": 0.027003 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 101.29166984558105, "completions/min_length": 44.75, "epoch": 8.939687267311989, "grad_norm": 0.002338574425309862, "kl": 0.26123046875, "learning_rate": 2.8008143622520907e-08, "loss": 0.0002605138288345188, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 5999, "train_speed(iter/s)": 0.027001 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.75, "completions/mean_length": 107.56250190734863, "completions/min_length": 39.0, "epoch": 8.941176470588236, "grad_norm": 0.002562812112950482, "kl": 0.251708984375, "learning_rate": 2.7930155125372446e-08, "loss": 0.00025154062313959, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6000, "train_speed(iter/s)": 0.027001 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.25, "completions/mean_length": 96.73958396911621, "completions/min_length": 32.5, "epoch": 8.942665673864482, "grad_norm": 0.0028793065229563044, "kl": 0.27001953125, "learning_rate": 2.785227223900494e-08, "loss": 0.0002700768818613142, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6001, "train_speed(iter/s)": 0.026994 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.5, "completions/mean_length": 100.15625381469727, "completions/min_length": 44.25, "epoch": 8.944154877140729, "grad_norm": 0.6584714847891174, "kl": 0.28759765625, "learning_rate": 2.777449498084211e-08, "loss": -0.0056435726583004, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6002, "train_speed(iter/s)": 0.026994 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 99.25000381469727, "completions/min_length": 40.75, "epoch": 8.945644080416978, "grad_norm": 0.002554959038805313, "kl": 0.26953125, "learning_rate": 2.7696823368284327e-08, "loss": 0.0002697138988878578, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6003, "train_speed(iter/s)": 0.026992 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 103.56250381469727, "completions/min_length": 33.0, "epoch": 8.947133283693224, "grad_norm": 0.0022130646261920303, "kl": 0.2490234375, "learning_rate": 2.7619257418708185e-08, "loss": 0.00024896994000300765, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6004, "train_speed(iter/s)": 0.026991 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.75, "completions/mean_length": 108.93750381469727, "completions/min_length": 43.75, "epoch": 8.948622486969471, "grad_norm": 0.0023335414432232447, "kl": 0.241455078125, "learning_rate": 2.7541797149466528e-08, "loss": 0.00024114683037623763, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6005, "train_speed(iter/s)": 0.026991 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 99.52083587646484, "completions/min_length": 41.5, "epoch": 8.950111690245718, "grad_norm": 1.7659647087517534, "kl": 0.26220703125, "learning_rate": 2.7464442577888657e-08, "loss": 0.004501555114984512, "memory(GiB)": 112.53, "reward": 1.697916716337204, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.697916679084301, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6006, "train_speed(iter/s)": 0.02699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 91.92708396911621, "completions/min_length": 39.0, "epoch": 8.951600893521967, "grad_norm": 0.0030074031776066534, "kl": 0.2822265625, "learning_rate": 2.7387193721280233e-08, "loss": 0.0002822081441991031, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6007, "train_speed(iter/s)": 0.026989 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.25, "completions/mean_length": 97.17708587646484, "completions/min_length": 35.25, "epoch": 8.953090096798213, "grad_norm": 0.6801493825093994, "kl": 0.253662109375, "learning_rate": 2.731005059692332e-08, "loss": -0.005563266109675169, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.06200198456645012, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 6008, "train_speed(iter/s)": 0.026989 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 98.52083587646484, "completions/min_length": 35.75, "epoch": 8.95457930007446, "grad_norm": 0.0026239163324039646, "kl": 0.2744140625, "learning_rate": 2.7233013222076217e-08, "loss": 0.0002742609358392656, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6009, "train_speed(iter/s)": 0.02699 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 292.25, "completions/mean_length": 96.56250381469727, "completions/min_length": 40.25, "epoch": 8.956068503350707, "grad_norm": 0.0024035601378411555, "kl": 0.279296875, "learning_rate": 2.7156081613973646e-08, "loss": 0.00027974226395599544, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6010, "train_speed(iter/s)": 0.026988 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 94.40625190734863, "completions/min_length": 41.5, "epoch": 8.957557706626954, "grad_norm": 0.002294345934599456, "kl": 0.266845703125, "learning_rate": 2.707925578982656e-08, "loss": 0.0002670878020580858, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6011, "train_speed(iter/s)": 0.026987 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.25, "completions/mean_length": 85.60416984558105, "completions/min_length": 33.75, "epoch": 8.959046909903202, "grad_norm": 0.004628211644432151, "kl": 0.2900390625, "learning_rate": 2.700253576682232e-08, "loss": 0.0002904208959080279, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6012, "train_speed(iter/s)": 0.026988 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 98.47916984558105, "completions/min_length": 43.75, "epoch": 8.96053611317945, "grad_norm": 0.7656337701237756, "kl": 0.243408203125, "learning_rate": 2.6925921562124866e-08, "loss": 0.0045584118925035, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6013, "train_speed(iter/s)": 0.026988 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 98.23958396911621, "completions/min_length": 29.0, "epoch": 8.962025316455696, "grad_norm": 0.002493488608455209, "kl": 0.25927734375, "learning_rate": 2.6849413192873816e-08, "loss": 0.00025949650444090366, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6014, "train_speed(iter/s)": 0.026986 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.25, "completions/mean_length": 94.54166793823242, "completions/min_length": 50.25, "epoch": 8.963514519731943, "grad_norm": 0.0023063027526151964, "kl": 0.281494140625, "learning_rate": 2.6773010676185814e-08, "loss": 0.00028175057377666235, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6015, "train_speed(iter/s)": 0.026984 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.5, "completions/mean_length": 105.52083587646484, "completions/min_length": 50.75, "epoch": 8.965003723008191, "grad_norm": 0.0023539919100868337, "kl": 0.246826171875, "learning_rate": 2.669671402915341e-08, "loss": 0.0002459831302985549, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6016, "train_speed(iter/s)": 0.026983 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 104.12500381469727, "completions/min_length": 48.75, "epoch": 8.966492926284438, "grad_norm": 0.002357321570841061, "kl": 0.243896484375, "learning_rate": 2.662052326884551e-08, "loss": 0.00024382675474043936, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6017, "train_speed(iter/s)": 0.026984 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 95.46875381469727, "completions/min_length": 37.25, "epoch": 8.967982129560685, "grad_norm": 0.002574419533355203, "kl": 0.26904296875, "learning_rate": 2.6544438412307634e-08, "loss": 0.0002693866263143718, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6018, "train_speed(iter/s)": 0.026981 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 91.59375381469727, "completions/min_length": 40.5, "epoch": 8.969471332836932, "grad_norm": 0.0027072597419332354, "kl": 0.30517578125, "learning_rate": 2.6468459476561068e-08, "loss": 0.00030455851810984313, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6019, "train_speed(iter/s)": 0.026981 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 107.66666793823242, "completions/min_length": 46.0, "epoch": 8.970960536113179, "grad_norm": 0.003006334382053092, "kl": 0.228759765625, "learning_rate": 2.6392586478603983e-08, "loss": 0.000228467135457322, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6020, "train_speed(iter/s)": 0.026979 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 90.44791984558105, "completions/min_length": 33.75, "epoch": 8.972449739389427, "grad_norm": 0.002426244465920987, "kl": 0.26953125, "learning_rate": 2.631681943541042e-08, "loss": 0.00026953971246257424, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6021, "train_speed(iter/s)": 0.026978 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 95.66666984558105, "completions/min_length": 38.25, "epoch": 8.973938942665674, "grad_norm": 0.0021014369932124714, "kl": 0.273681640625, "learning_rate": 2.6241158363930817e-08, "loss": 0.00027391850017011166, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6022, "train_speed(iter/s)": 0.026976 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 107.03125381469727, "completions/min_length": 42.0, "epoch": 8.97542814594192, "grad_norm": 0.0022219315058672577, "kl": 0.265380859375, "learning_rate": 2.6165603281092185e-08, "loss": 0.00026482396060600877, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6023, "train_speed(iter/s)": 0.026974 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 93.70833587646484, "completions/min_length": 38.25, "epoch": 8.976917349218168, "grad_norm": 0.002437618489527985, "kl": 0.26513671875, "learning_rate": 2.6090154203797398e-08, "loss": 0.00026537157827988267, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6024, "train_speed(iter/s)": 0.026976 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 89.85417175292969, "completions/min_length": 38.75, "epoch": 8.978406552494416, "grad_norm": 0.0021291800146893106, "kl": 0.2880859375, "learning_rate": 2.6014811148925952e-08, "loss": 0.0002879006206057966, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6025, "train_speed(iter/s)": 0.026975 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.25, "completions/mean_length": 103.04166793823242, "completions/min_length": 44.5, "epoch": 8.979895755770663, "grad_norm": 0.0022161645248035843, "kl": 0.24658203125, "learning_rate": 2.5939574133333308e-08, "loss": 0.00024656218010932207, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6026, "train_speed(iter/s)": 0.026973 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/mean_length": 102.13541793823242, "completions/min_length": 43.0, "epoch": 8.98138495904691, "grad_norm": 0.0026038414192590255, "kl": 0.2578125, "learning_rate": 2.5864443173851502e-08, "loss": 0.0002576524275355041, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6027, "train_speed(iter/s)": 0.026972 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.25, "completions/mean_length": 103.65625381469727, "completions/min_length": 37.75, "epoch": 8.982874162323156, "grad_norm": 0.0028671539757544272, "kl": 0.25927734375, "learning_rate": 2.5789418287288755e-08, "loss": 0.00025942022330127656, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6028, "train_speed(iter/s)": 0.02697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.75, "completions/mean_length": 88.11458587646484, "completions/min_length": 41.0, "epoch": 8.984363365599405, "grad_norm": 0.09765292952506034, "kl": 0.310302734375, "learning_rate": 2.571449949042942e-08, "loss": 0.0003108623786829412, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6029, "train_speed(iter/s)": 0.026968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.25, "completions/mean_length": 89.17708587646484, "completions/min_length": 40.75, "epoch": 8.985852568875652, "grad_norm": 0.002619251842887043, "kl": 0.294921875, "learning_rate": 2.563968680003442e-08, "loss": 0.0002946885069832206, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6030, "train_speed(iter/s)": 0.026969 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.75, "completions/mean_length": 103.31250190734863, "completions/min_length": 37.75, "epoch": 8.987341772151899, "grad_norm": 0.0026652158749789767, "kl": 0.25439453125, "learning_rate": 2.5564980232840428e-08, "loss": 0.00025420516612939537, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6031, "train_speed(iter/s)": 0.026968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 92.26041984558105, "completions/min_length": 45.0, "epoch": 8.988830975428145, "grad_norm": 0.002559764605125416, "kl": 0.279296875, "learning_rate": 2.5490379805560957e-08, "loss": 0.00027974913245998323, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6032, "train_speed(iter/s)": 0.02697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.75, "completions/mean_length": 103.37500190734863, "completions/min_length": 44.0, "epoch": 8.990320178704394, "grad_norm": 0.0025154150979192383, "kl": 0.27001953125, "learning_rate": 2.5415885534885438e-08, "loss": 0.0002694012364372611, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6033, "train_speed(iter/s)": 0.02697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 97.10416793823242, "completions/min_length": 40.0, "epoch": 8.99180938198064, "grad_norm": 0.0023209318679534665, "kl": 0.26025390625, "learning_rate": 2.5341497437479486e-08, "loss": 0.00026044732658192515, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6034, "train_speed(iter/s)": 0.02697 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.25, "completions/mean_length": 97.83333587646484, "completions/min_length": 40.75, "epoch": 8.993298585256888, "grad_norm": 0.0023955313607825377, "kl": 0.27587890625, "learning_rate": 2.526721552998534e-08, "loss": 0.0002757749753072858, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6035, "train_speed(iter/s)": 0.026969 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 94.34375, "completions/min_length": 44.0, "epoch": 8.994787788533134, "grad_norm": 0.029569159174541962, "kl": 0.28662109375, "learning_rate": 2.519303982902099e-08, "loss": 0.000286113063339144, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6036, "train_speed(iter/s)": 0.026968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 94.60416793823242, "completions/min_length": 35.0, "epoch": 8.996276991809381, "grad_norm": 0.002759399316648703, "kl": 0.263427734375, "learning_rate": 2.5118970351181103e-08, "loss": 0.00026381638599559665, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6037, "train_speed(iter/s)": 0.026968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 90.05208587646484, "completions/min_length": 38.5, "epoch": 8.99776619508563, "grad_norm": 0.002310753865821335, "kl": 0.257568359375, "learning_rate": 2.5045007113036308e-08, "loss": 0.00025747675681486726, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6038, "train_speed(iter/s)": 0.026968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.5, "completions/mean_length": 110.89583587646484, "completions/min_length": 33.5, "epoch": 8.999255398361877, "grad_norm": 0.002572768761401025, "kl": 0.24853515625, "learning_rate": 2.49711501311336e-08, "loss": 0.0002477962989360094, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6039, "train_speed(iter/s)": 0.026968 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 102.01042175292969, "completions/min_length": 34.25, "epoch": 9.001489203276247, "grad_norm": 0.0023931128861325412, "kl": 0.2607421875, "learning_rate": 2.4897399421996145e-08, "loss": 0.0002607168862596154, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6040, "train_speed(iter/s)": 0.026967 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.0, "completions/mean_length": 102.75000381469727, "completions/min_length": 45.5, "epoch": 9.002978406552494, "grad_norm": 0.0023919947344026933, "kl": 0.26220703125, "learning_rate": 2.482375500212325e-08, "loss": 0.0002620273153297603, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6041, "train_speed(iter/s)": 0.026965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.75, "completions/mean_length": 100.14583396911621, "completions/min_length": 34.5, "epoch": 9.004467609828742, "grad_norm": 0.00258932878721292, "kl": 0.2548828125, "learning_rate": 2.4750216887990672e-08, "loss": 0.0002546596806496382, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6042, "train_speed(iter/s)": 0.026965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 90.21875190734863, "completions/min_length": 45.5, "epoch": 9.005956813104989, "grad_norm": 1.8382427042134912, "kl": 0.2763671875, "learning_rate": 2.4676785096050146e-08, "loss": -0.0038875415921211243, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6043, "train_speed(iter/s)": 0.026965 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.0, "completions/mean_length": 94.47916984558105, "completions/min_length": 42.5, "epoch": 9.007446016381236, "grad_norm": 0.002577130512026801, "kl": 0.279296875, "learning_rate": 2.4603459642729864e-08, "loss": 0.0002802601957228035, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6044, "train_speed(iter/s)": 0.026963 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.25, "completions/mean_length": 90.17708778381348, "completions/min_length": 39.0, "epoch": 9.008935219657483, "grad_norm": 0.0024628703359331297, "kl": 0.299072265625, "learning_rate": 2.4530240544433867e-08, "loss": 0.00029868714045733213, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6045, "train_speed(iter/s)": 0.026961 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.75, "completions/mean_length": 99.43750381469727, "completions/min_length": 40.0, "epoch": 9.010424422933731, "grad_norm": 0.9577223673080411, "kl": 0.26953125, "learning_rate": 2.4457127817542833e-08, "loss": -0.002110494300723076, "memory(GiB)": 112.53, "reward": 1.6250000298023224, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.6250000149011612, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6046, "train_speed(iter/s)": 0.026961 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 94.86458587646484, "completions/min_length": 48.75, "epoch": 9.011913626209978, "grad_norm": 0.0025584176836605577, "kl": 0.271728515625, "learning_rate": 2.43841214784134e-08, "loss": 0.0002715190057642758, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6047, "train_speed(iter/s)": 0.026961 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 92.98958587646484, "completions/min_length": 40.5, "epoch": 9.013402829486225, "grad_norm": 0.0035520315345512147, "kl": 0.24365234375, "learning_rate": 2.431122154337839e-08, "loss": 0.00024343618133571, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6048, "train_speed(iter/s)": 0.026962 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 96.16666793823242, "completions/min_length": 38.0, "epoch": 9.014892032762472, "grad_norm": 0.002424437992102338, "kl": 0.295654296875, "learning_rate": 2.423842802874687e-08, "loss": 0.00029515172354876995, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6049, "train_speed(iter/s)": 0.026961 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 88.30208587646484, "completions/min_length": 39.75, "epoch": 9.01638123603872, "grad_norm": 0.0027084087819823997, "kl": 0.280517578125, "learning_rate": 2.4165740950804038e-08, "loss": 0.0002805717522278428, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6050, "train_speed(iter/s)": 0.02696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 97.61458587646484, "completions/min_length": 38.5, "epoch": 9.017870439314967, "grad_norm": 0.0025531604295748384, "kl": 0.2490234375, "learning_rate": 2.4093160325811545e-08, "loss": 0.00024940213188529015, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6051, "train_speed(iter/s)": 0.02696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 96.86458778381348, "completions/min_length": 33.5, "epoch": 9.019359642591214, "grad_norm": 0.002531662518397822, "kl": 0.277099609375, "learning_rate": 2.4020686170006798e-08, "loss": 0.00027722836239263415, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6052, "train_speed(iter/s)": 0.026962 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.75, "completions/mean_length": 99.46875190734863, "completions/min_length": 37.5, "epoch": 9.02084884586746, "grad_norm": 0.0021830975784005815, "kl": 0.25927734375, "learning_rate": 2.394831849960377e-08, "loss": 0.0002596962731331587, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6053, "train_speed(iter/s)": 0.026962 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 270.0, "completions/mean_length": 98.65625190734863, "completions/min_length": 29.25, "epoch": 9.022338049143707, "grad_norm": 0.0021838310447180463, "kl": 0.279296875, "learning_rate": 2.3876057330792343e-08, "loss": 0.00027946726186200976, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6054, "train_speed(iter/s)": 0.026961 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 86.59375381469727, "completions/min_length": 38.5, "epoch": 9.023827252419956, "grad_norm": 0.002656313937800401, "kl": 0.287353515625, "learning_rate": 2.3803902679738697e-08, "loss": 0.00028677599038928747, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6055, "train_speed(iter/s)": 0.02696 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 289.5, "completions/mean_length": 90.27083587646484, "completions/min_length": 45.25, "epoch": 9.025316455696203, "grad_norm": 0.007948353378060925, "kl": 0.27099609375, "learning_rate": 2.3731854562585308e-08, "loss": 0.00027092653908766806, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6056, "train_speed(iter/s)": 0.026959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.0, "completions/mean_length": 91.70833587646484, "completions/min_length": 37.25, "epoch": 9.02680565897245, "grad_norm": 0.002250943288270423, "kl": 0.26611328125, "learning_rate": 2.3659912995450447e-08, "loss": 0.0002651865070220083, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6057, "train_speed(iter/s)": 0.026959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 95.36458587646484, "completions/min_length": 44.0, "epoch": 9.028294862248696, "grad_norm": 0.002503943530708531, "kl": 0.2705078125, "learning_rate": 2.358807799442897e-08, "loss": 0.0002701027551665902, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6058, "train_speed(iter/s)": 0.026959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 91.22916984558105, "completions/min_length": 38.5, "epoch": 9.029784065524945, "grad_norm": 0.002655618998710493, "kl": 0.2880859375, "learning_rate": 2.3516349575591567e-08, "loss": 0.00028823327738791704, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6059, "train_speed(iter/s)": 0.026957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 82.07291793823242, "completions/min_length": 32.5, "epoch": 9.031273268801192, "grad_norm": 1.6377142110999723, "kl": 0.2919921875, "learning_rate": 2.344472775498524e-08, "loss": -0.011935114860534668, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6060, "train_speed(iter/s)": 0.026959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 95.05208396911621, "completions/min_length": 36.25, "epoch": 9.032762472077438, "grad_norm": 0.002710012718533502, "kl": 0.2734375, "learning_rate": 2.337321254863328e-08, "loss": 0.0002729077823460102, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6061, "train_speed(iter/s)": 0.026959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 99.96875381469727, "completions/min_length": 44.25, "epoch": 9.034251675353685, "grad_norm": 0.0027362119832210247, "kl": 0.267578125, "learning_rate": 2.3301803972534728e-08, "loss": 0.00026791394338943064, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6062, "train_speed(iter/s)": 0.026959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 96.61458587646484, "completions/min_length": 34.25, "epoch": 9.035740878629934, "grad_norm": 0.002333165127334595, "kl": 0.26220703125, "learning_rate": 2.323050204266519e-08, "loss": 0.0002618877333588898, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6063, "train_speed(iter/s)": 0.026959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 94.85416984558105, "completions/min_length": 37.5, "epoch": 9.03723008190618, "grad_norm": 0.00240049850506587, "kl": 0.27392578125, "learning_rate": 2.3159306774976074e-08, "loss": 0.0002734082518145442, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6064, "train_speed(iter/s)": 0.026959 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 89.83333778381348, "completions/min_length": 39.75, "epoch": 9.038719285182427, "grad_norm": 0.002456274538582753, "kl": 0.30029296875, "learning_rate": 2.308821818539519e-08, "loss": 0.0003004029858857393, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6065, "train_speed(iter/s)": 0.026958 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/mean_length": 90.53125190734863, "completions/min_length": 34.75, "epoch": 9.040208488458674, "grad_norm": 0.0025878793863437994, "kl": 0.29150390625, "learning_rate": 2.301723628982638e-08, "loss": 0.00029163144063204527, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6066, "train_speed(iter/s)": 0.026955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 87.26041793823242, "completions/min_length": 32.25, "epoch": 9.041697691734921, "grad_norm": 0.002620272893545886, "kl": 0.2890625, "learning_rate": 2.2946361104149492e-08, "loss": 0.0002891277545131743, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6067, "train_speed(iter/s)": 0.026956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.0, "completions/mean_length": 102.72916984558105, "completions/min_length": 47.75, "epoch": 9.04318689501117, "grad_norm": 0.002278404946822386, "kl": 0.260498046875, "learning_rate": 2.2875592644220842e-08, "loss": 0.0002602063468657434, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6068, "train_speed(iter/s)": 0.026956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 96.04166984558105, "completions/min_length": 37.75, "epoch": 9.044676098287416, "grad_norm": 0.002315587719881354, "kl": 0.2802734375, "learning_rate": 2.2804930925872377e-08, "loss": 0.00028039549943059683, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6069, "train_speed(iter/s)": 0.026956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.5, "completions/mean_length": 89.86458587646484, "completions/min_length": 38.5, "epoch": 9.046165301563663, "grad_norm": 0.0024895862023082845, "kl": 0.2900390625, "learning_rate": 2.273437596491262e-08, "loss": 0.0002902429550886154, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6070, "train_speed(iter/s)": 0.026957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 101.79167175292969, "completions/min_length": 45.25, "epoch": 9.04765450483991, "grad_norm": 0.002457869749338657, "kl": 0.27490234375, "learning_rate": 2.2663927777125945e-08, "loss": 0.0002749702543951571, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6071, "train_speed(iter/s)": 0.026955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 91.84375190734863, "completions/min_length": 43.25, "epoch": 9.049143708116159, "grad_norm": 0.002411592384239559, "kl": 0.28173828125, "learning_rate": 2.259358637827291e-08, "loss": 0.00028182746609672904, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6072, "train_speed(iter/s)": 0.026953 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.5, "completions/mean_length": 92.75000190734863, "completions/min_length": 38.25, "epoch": 9.050632911392405, "grad_norm": 0.0025054629366988643, "kl": 0.270263671875, "learning_rate": 2.252335178409037e-08, "loss": 0.0002704397193156183, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6073, "train_speed(iter/s)": 0.026955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 102.02083778381348, "completions/min_length": 40.5, "epoch": 9.052122114668652, "grad_norm": 0.006731844221514556, "kl": 0.26025390625, "learning_rate": 2.2453224010290816e-08, "loss": 0.00026050128508359194, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6074, "train_speed(iter/s)": 0.026954 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.25, "completions/mean_length": 106.75000381469727, "completions/min_length": 41.0, "epoch": 9.053611317944899, "grad_norm": 0.0022092900823891992, "kl": 0.236083984375, "learning_rate": 2.238320307256336e-08, "loss": 0.00023581829736940563, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6075, "train_speed(iter/s)": 0.026955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 99.15625381469727, "completions/min_length": 46.5, "epoch": 9.055100521221148, "grad_norm": 0.0023531353362915472, "kl": 0.26611328125, "learning_rate": 2.231328898657292e-08, "loss": 0.0002657688455656171, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6076, "train_speed(iter/s)": 0.026955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.0, "completions/mean_length": 102.82291984558105, "completions/min_length": 35.5, "epoch": 9.056589724497394, "grad_norm": 0.002191283432685374, "kl": 0.248779296875, "learning_rate": 2.224348176796048e-08, "loss": 0.0002485496806912124, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6077, "train_speed(iter/s)": 0.026954 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.5, "completions/mean_length": 98.30208587646484, "completions/min_length": 37.25, "epoch": 9.058078927773641, "grad_norm": 0.002501020791910996, "kl": 0.267333984375, "learning_rate": 2.217378143234333e-08, "loss": 0.00026716708089224994, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6078, "train_speed(iter/s)": 0.026954 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 94.42708587646484, "completions/min_length": 31.75, "epoch": 9.059568131049888, "grad_norm": 1.7336333616100277, "kl": 0.28857421875, "learning_rate": 2.210418799531466e-08, "loss": -0.0017164468299597502, "memory(GiB)": 112.53, "reward": 1.7187500596046448, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7187500149011612, "rewards/CineAccuracyORM/std": 0.3680160492658615, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6079, "train_speed(iter/s)": 0.026956 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 90.47916793823242, "completions/min_length": 40.5, "epoch": 9.061057334326135, "grad_norm": 0.0024286057318018465, "kl": 0.282470703125, "learning_rate": 2.2034701472443852e-08, "loss": 0.0002823553513735533, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6080, "train_speed(iter/s)": 0.026955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.25, "completions/mean_length": 101.65625381469727, "completions/min_length": 36.5, "epoch": 9.062546537602383, "grad_norm": 0.0023335463992081914, "kl": 0.255615234375, "learning_rate": 2.19653218792763e-08, "loss": 0.0002554943785071373, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6081, "train_speed(iter/s)": 0.026955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.75, "completions/mean_length": 83.16666984558105, "completions/min_length": 36.5, "epoch": 9.06403574087863, "grad_norm": 0.0025100115916328325, "kl": 0.2802734375, "learning_rate": 2.189604923133359e-08, "loss": 0.0002804224786814302, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6082, "train_speed(iter/s)": 0.026957 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 100.48958587646484, "completions/min_length": 41.0, "epoch": 9.065524944154877, "grad_norm": 0.0026863259895872566, "kl": 0.266357421875, "learning_rate": 2.182688354411316e-08, "loss": 0.0002664586063474417, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6083, "train_speed(iter/s)": 0.026958 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 105.59375190734863, "completions/min_length": 41.5, "epoch": 9.067014147431124, "grad_norm": 0.0024125027786054843, "kl": 0.245361328125, "learning_rate": 2.175782483308869e-08, "loss": 0.0002452486369293183, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6084, "train_speed(iter/s)": 0.026955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 94.75000381469727, "completions/min_length": 34.25, "epoch": 9.068503350707372, "grad_norm": 0.0026158991063277614, "kl": 0.252197265625, "learning_rate": 2.1688873113709983e-08, "loss": 0.0002522994764149189, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6085, "train_speed(iter/s)": 0.026955 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 99.93750190734863, "completions/min_length": 41.75, "epoch": 9.069992553983619, "grad_norm": 0.0022537429731891586, "kl": 0.2724609375, "learning_rate": 2.1620028401402812e-08, "loss": 0.00027220917399972677, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6086, "train_speed(iter/s)": 0.026954 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 165.75, "completions/mean_length": 86.73958587646484, "completions/min_length": 38.5, "epoch": 9.071481757259866, "grad_norm": 0.0024885156685934074, "kl": 0.27392578125, "learning_rate": 2.1551290711568916e-08, "loss": 0.0002742116048466414, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6087, "train_speed(iter/s)": 0.026954 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 96.58333587646484, "completions/min_length": 41.25, "epoch": 9.072970960536113, "grad_norm": 0.0031144605562527785, "kl": 0.30126953125, "learning_rate": 2.1482660059586265e-08, "loss": 0.00030114370747469366, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6088, "train_speed(iter/s)": 0.026951 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 99.26041984558105, "completions/min_length": 41.75, "epoch": 9.074460163812361, "grad_norm": 0.9699755970529128, "kl": 0.28564453125, "learning_rate": 2.1414136460808806e-08, "loss": -0.00024646264500916004, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6089, "train_speed(iter/s)": 0.026948 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 86.18750190734863, "completions/min_length": 36.5, "epoch": 9.075949367088608, "grad_norm": 0.002744908818306821, "kl": 0.3017578125, "learning_rate": 2.134571993056644e-08, "loss": 0.00030148201039992273, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6090, "train_speed(iter/s)": 0.026947 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 107.83333587646484, "completions/min_length": 43.75, "epoch": 9.077438570364855, "grad_norm": 1.3734659006029213, "kl": 0.246337890625, "learning_rate": 2.1277410484165314e-08, "loss": -0.0013819861924275756, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6091, "train_speed(iter/s)": 0.026947 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.0, "completions/mean_length": 102.38542175292969, "completions/min_length": 40.0, "epoch": 9.078927773641102, "grad_norm": 0.002562290226151795, "kl": 0.244873046875, "learning_rate": 2.120920813688759e-08, "loss": 0.0002452965418342501, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6092, "train_speed(iter/s)": 0.026948 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.25, "completions/mean_length": 94.76041984558105, "completions/min_length": 42.0, "epoch": 9.080416976917348, "grad_norm": 0.002301368767483327, "kl": 0.25830078125, "learning_rate": 2.114111290399123e-08, "loss": 0.0002584023168310523, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6093, "train_speed(iter/s)": 0.026949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.75, "completions/mean_length": 94.38541793823242, "completions/min_length": 38.0, "epoch": 9.081906180193597, "grad_norm": 0.003783316472762446, "kl": 0.27783203125, "learning_rate": 2.107312480071055e-08, "loss": 0.00027754795155487955, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6094, "train_speed(iter/s)": 0.026951 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.75, "completions/mean_length": 94.50000381469727, "completions/min_length": 43.75, "epoch": 9.083395383469844, "grad_norm": 0.6323741815103938, "kl": 0.3125, "learning_rate": 2.100524384225555e-08, "loss": 0.013935393653810024, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6095, "train_speed(iter/s)": 0.026951 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 88.90625190734863, "completions/min_length": 36.5, "epoch": 9.08488458674609, "grad_norm": 0.002295036873903456, "kl": 0.275146484375, "learning_rate": 2.0937470043812688e-08, "loss": 0.00027491868240758777, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6096, "train_speed(iter/s)": 0.02695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 101.92708587646484, "completions/min_length": 42.5, "epoch": 9.086373790022337, "grad_norm": 0.7989256448986389, "kl": 0.24267578125, "learning_rate": 2.086980342054412e-08, "loss": 0.002940781181678176, "memory(GiB)": 112.53, "reward": 1.90625, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6097, "train_speed(iter/s)": 0.02695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 100.07291793823242, "completions/min_length": 43.25, "epoch": 9.087862993298586, "grad_norm": 0.002384714717150888, "kl": 0.2587890625, "learning_rate": 2.0802243987588064e-08, "loss": 0.0002585484180599451, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6098, "train_speed(iter/s)": 0.026951 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 89.21875190734863, "completions/min_length": 37.5, "epoch": 9.089352196574833, "grad_norm": 0.0022222878144663935, "kl": 0.3056640625, "learning_rate": 2.073479176005899e-08, "loss": 0.0003049930674023926, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6099, "train_speed(iter/s)": 0.026949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/mean_length": 97.22916984558105, "completions/min_length": 42.75, "epoch": 9.09084139985108, "grad_norm": 0.002540021606240053, "kl": 0.28515625, "learning_rate": 2.066744675304699e-08, "loss": 0.000285362038994208, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6100, "train_speed(iter/s)": 0.026949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.0, "completions/mean_length": 90.35416984558105, "completions/min_length": 37.0, "epoch": 9.092330603127326, "grad_norm": 0.0027635898788988763, "kl": 0.2978515625, "learning_rate": 2.0600208981618627e-08, "loss": 0.00029776027076877654, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6101, "train_speed(iter/s)": 0.026948 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.75, "completions/mean_length": 102.82291984558105, "completions/min_length": 39.0, "epoch": 9.093819806403575, "grad_norm": 0.7345432727642198, "kl": 0.2431640625, "learning_rate": 2.0533078460815977e-08, "loss": 0.018439605832099915, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6102, "train_speed(iter/s)": 0.02695 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.75, "completions/mean_length": 103.60416793823242, "completions/min_length": 38.25, "epoch": 9.095309009679822, "grad_norm": 0.002324785248666524, "kl": 0.273681640625, "learning_rate": 2.0466055205657585e-08, "loss": 0.0002733796718530357, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6103, "train_speed(iter/s)": 0.026949 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 97.90625190734863, "completions/min_length": 44.75, "epoch": 9.096798212956068, "grad_norm": 0.003841317071110761, "kl": 0.27197265625, "learning_rate": 2.039913923113773e-08, "loss": 0.00027242989744991064, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6104, "train_speed(iter/s)": 0.026948 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 101.27083778381348, "completions/min_length": 41.5, "epoch": 9.098287416232315, "grad_norm": 0.002415670764979984, "kl": 0.29150390625, "learning_rate": 2.033233055222672e-08, "loss": 0.0002914188662543893, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6105, "train_speed(iter/s)": 0.026945 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 91.89583587646484, "completions/min_length": 35.75, "epoch": 9.099776619508562, "grad_norm": 0.002067846564006664, "kl": 0.2900390625, "learning_rate": 2.0265629183871035e-08, "loss": 0.0002903534914366901, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6106, "train_speed(iter/s)": 0.026944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 102.41666984558105, "completions/min_length": 42.25, "epoch": 9.10126582278481, "grad_norm": 0.0028238159615075775, "kl": 0.266845703125, "learning_rate": 2.019903514099275e-08, "loss": 0.00026680686278268695, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6107, "train_speed(iter/s)": 0.026942 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 95.53125190734863, "completions/min_length": 39.25, "epoch": 9.102755026061057, "grad_norm": 0.0024263575653687367, "kl": 0.2744140625, "learning_rate": 2.013254843849044e-08, "loss": 0.0002738353796303272, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6108, "train_speed(iter/s)": 0.02694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 94.73958587646484, "completions/min_length": 42.0, "epoch": 9.104244229337304, "grad_norm": 0.0027914759291702046, "kl": 0.27001953125, "learning_rate": 2.0066169091238316e-08, "loss": 0.0002702863421291113, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6109, "train_speed(iter/s)": 0.026942 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.0, "completions/mean_length": 96.21875381469727, "completions/min_length": 47.0, "epoch": 9.105733432613551, "grad_norm": 2.0390633488210543, "kl": 0.2705078125, "learning_rate": 1.9999897114086617e-08, "loss": 0.0005424560513347387, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6110, "train_speed(iter/s)": 0.026941 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 97.03125190734863, "completions/min_length": 36.25, "epoch": 9.1072226358898, "grad_norm": 0.0025590221197193752, "kl": 0.2548828125, "learning_rate": 1.9933732521861812e-08, "loss": 0.00025521061616018414, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6111, "train_speed(iter/s)": 0.026942 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 91.20833778381348, "completions/min_length": 41.75, "epoch": 9.108711839166046, "grad_norm": 0.0025573410649414857, "kl": 0.265869140625, "learning_rate": 1.9867675329365842e-08, "loss": 0.0002661034814082086, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6112, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 96.32291984558105, "completions/min_length": 42.0, "epoch": 9.110201042442293, "grad_norm": 0.002304597559208178, "kl": 0.258056640625, "learning_rate": 1.9801725551377215e-08, "loss": 0.0002581820881459862, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6113, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 95.62500190734863, "completions/min_length": 45.5, "epoch": 9.11169024571854, "grad_norm": 0.0027787466343702594, "kl": 0.277099609375, "learning_rate": 1.9735883202649904e-08, "loss": 0.0002770263818092644, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6114, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.25, "completions/mean_length": 98.03125190734863, "completions/min_length": 34.5, "epoch": 9.113179448994789, "grad_norm": 0.002947299494971859, "kl": 0.25927734375, "learning_rate": 1.967014829791419e-08, "loss": 0.0002590307267382741, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6115, "train_speed(iter/s)": 0.026944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 95.45833587646484, "completions/min_length": 43.5, "epoch": 9.114668652271035, "grad_norm": 0.0022880764646356277, "kl": 0.283447265625, "learning_rate": 1.9604520851876194e-08, "loss": 0.00028337707044556737, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6116, "train_speed(iter/s)": 0.026946 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 96.02083587646484, "completions/min_length": 41.5, "epoch": 9.116157855547282, "grad_norm": 0.002914051842602142, "kl": 0.27880859375, "learning_rate": 1.9539000879217837e-08, "loss": 0.0002790822181850672, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6117, "train_speed(iter/s)": 0.026944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.75, "completions/mean_length": 101.63542175292969, "completions/min_length": 38.5, "epoch": 9.117647058823529, "grad_norm": 0.0023856423991369473, "kl": 0.269775390625, "learning_rate": 1.947358839459734e-08, "loss": 0.00026913953479379416, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6118, "train_speed(iter/s)": 0.026942 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 88.77083587646484, "completions/min_length": 34.5, "epoch": 9.119136262099776, "grad_norm": 0.002295646300728322, "kl": 0.28466796875, "learning_rate": 1.9408283412648606e-08, "loss": 0.00028429977828636765, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6119, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 102.82291984558105, "completions/min_length": 45.75, "epoch": 9.120625465376024, "grad_norm": 0.00232765313108439, "kl": 0.24658203125, "learning_rate": 1.934308594798162e-08, "loss": 0.00024675618624314666, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6120, "train_speed(iter/s)": 0.026944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.0, "completions/mean_length": 86.04167175292969, "completions/min_length": 31.75, "epoch": 9.122114668652271, "grad_norm": 1.073571911183591, "kl": 0.31103515625, "learning_rate": 1.9277996015182206e-08, "loss": -0.020782997831702232, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6121, "train_speed(iter/s)": 0.026944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 87.84375190734863, "completions/min_length": 37.0, "epoch": 9.123603871928518, "grad_norm": 0.002640309457420099, "kl": 0.288330078125, "learning_rate": 1.921301362881217e-08, "loss": 0.00028881721664220095, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6122, "train_speed(iter/s)": 0.026944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 100.09375381469727, "completions/min_length": 44.75, "epoch": 9.125093075204765, "grad_norm": 0.0027268222699973646, "kl": 0.255615234375, "learning_rate": 1.914813880340932e-08, "loss": 0.0002556979889050126, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6123, "train_speed(iter/s)": 0.026945 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 97.84375381469727, "completions/min_length": 41.25, "epoch": 9.126582278481013, "grad_norm": 2.16493885520247, "kl": 0.270751953125, "learning_rate": 1.908337155348738e-08, "loss": 0.007174676284193993, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6124, "train_speed(iter/s)": 0.026945 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 96.78125190734863, "completions/min_length": 38.25, "epoch": 9.12807148175726, "grad_norm": 0.0022982696549358928, "kl": 0.28857421875, "learning_rate": 1.901871189353599e-08, "loss": 0.0002879674138966948, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6125, "train_speed(iter/s)": 0.026945 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 92.50000381469727, "completions/min_length": 46.5, "epoch": 9.129560685033507, "grad_norm": 0.003219247274768207, "kl": 0.28466796875, "learning_rate": 1.895415983802068e-08, "loss": 0.0002845008857548237, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6126, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 94.48958587646484, "completions/min_length": 32.5, "epoch": 9.131049888309754, "grad_norm": 1.0878801645858391, "kl": 0.278076171875, "learning_rate": 1.8889715401382898e-08, "loss": -0.006975123193114996, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.822916679084301, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6127, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.5, "completions/mean_length": 98.39583778381348, "completions/min_length": 43.5, "epoch": 9.132539091586002, "grad_norm": 0.002744625880008218, "kl": 0.27490234375, "learning_rate": 1.8825378598040065e-08, "loss": 0.00027413011412136257, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6128, "train_speed(iter/s)": 0.026941 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.25, "completions/mean_length": 92.26041984558105, "completions/min_length": 39.5, "epoch": 9.134028294862249, "grad_norm": 0.0024291357584466833, "kl": 0.306640625, "learning_rate": 1.876114944238566e-08, "loss": 0.00030700035858899355, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6129, "train_speed(iter/s)": 0.026941 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 94.08333587646484, "completions/min_length": 46.0, "epoch": 9.135517498138496, "grad_norm": 0.0027659242859650684, "kl": 0.248779296875, "learning_rate": 1.8697027948788803e-08, "loss": 0.00024899127311073244, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6130, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 95.36458587646484, "completions/min_length": 35.25, "epoch": 9.137006701414743, "grad_norm": 0.00215091056698328, "kl": 0.26953125, "learning_rate": 1.8633014131594738e-08, "loss": 0.0002693000715225935, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6131, "train_speed(iter/s)": 0.026942 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 294.5, "completions/mean_length": 106.45833778381348, "completions/min_length": 46.75, "epoch": 9.13849590469099, "grad_norm": 0.0027300067471179167, "kl": 0.278564453125, "learning_rate": 1.8569108005124457e-08, "loss": 0.00027893471997231245, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6132, "train_speed(iter/s)": 0.02694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 174.5, "completions/mean_length": 84.21875381469727, "completions/min_length": 33.25, "epoch": 9.139985107967238, "grad_norm": 0.003751795805706042, "kl": 0.2744140625, "learning_rate": 1.850530958367491e-08, "loss": 0.0002749170525930822, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6133, "train_speed(iter/s)": 0.026941 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.75, "completions/mean_length": 107.17708778381348, "completions/min_length": 44.0, "epoch": 9.141474311243485, "grad_norm": 0.0024096027659260676, "kl": 0.222412109375, "learning_rate": 1.8441618881519184e-08, "loss": 0.00022202414402272552, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6134, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 99.66666793823242, "completions/min_length": 32.75, "epoch": 9.142963514519732, "grad_norm": 0.002556307476932476, "kl": 0.26806640625, "learning_rate": 1.837803591290593e-08, "loss": 0.000268039817456156, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6135, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 95.83333587646484, "completions/min_length": 41.0, "epoch": 9.144452717795978, "grad_norm": 0.7694227342839066, "kl": 0.27099609375, "learning_rate": 1.8314560692059833e-08, "loss": -0.006781230680644512, "memory(GiB)": 112.53, "reward": 1.9791666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6136, "train_speed(iter/s)": 0.026944 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 96.10417175292969, "completions/min_length": 44.75, "epoch": 9.145941921072227, "grad_norm": 0.002326360229804034, "kl": 0.27294921875, "learning_rate": 1.8251193233181528e-08, "loss": 0.00027301604859530926, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6137, "train_speed(iter/s)": 0.026943 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 90.34375381469727, "completions/min_length": 44.25, "epoch": 9.147431124348474, "grad_norm": 0.0025776028514064688, "kl": 0.277099609375, "learning_rate": 1.8187933550447453e-08, "loss": 0.0002767594996839762, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6138, "train_speed(iter/s)": 0.026942 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 102.52083778381348, "completions/min_length": 46.25, "epoch": 9.14892032762472, "grad_norm": 0.19569872650807224, "kl": 0.317626953125, "learning_rate": 1.812478165801007e-08, "loss": 0.00031681492691859603, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6139, "train_speed(iter/s)": 0.02694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 104.84375381469727, "completions/min_length": 47.0, "epoch": 9.150409530900967, "grad_norm": 0.002687874396354173, "kl": 0.234375, "learning_rate": 1.8061737569997404e-08, "loss": 0.00023405547835864127, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6140, "train_speed(iter/s)": 0.026941 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 90.25000190734863, "completions/min_length": 41.25, "epoch": 9.151898734177216, "grad_norm": 0.0027439028196556, "kl": 0.279296875, "learning_rate": 1.7998801300513844e-08, "loss": 0.000279408588539809, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6141, "train_speed(iter/s)": 0.026941 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 89.60416984558105, "completions/min_length": 38.5, "epoch": 9.153387937453463, "grad_norm": 0.00336583506847936, "kl": 0.2841796875, "learning_rate": 1.7935972863639237e-08, "loss": 0.0002843558322638273, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6142, "train_speed(iter/s)": 0.02694 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 100.70833587646484, "completions/min_length": 33.75, "epoch": 9.15487714072971, "grad_norm": 0.0024322606697761906, "kl": 0.248779296875, "learning_rate": 1.7873252273429506e-08, "loss": 0.0002484330325387418, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6143, "train_speed(iter/s)": 0.026937 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 103.03125190734863, "completions/min_length": 48.75, "epoch": 9.156366344005956, "grad_norm": 1.0382615586856832, "kl": 0.24658203125, "learning_rate": 1.7810639543916484e-08, "loss": -0.015160429291427135, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6144, "train_speed(iter/s)": 0.026937 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 86.43750381469727, "completions/min_length": 30.75, "epoch": 9.157855547282203, "grad_norm": 0.005938853986897059, "kl": 0.29345703125, "learning_rate": 1.7748134689107685e-08, "loss": 0.00029324390925467014, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6145, "train_speed(iter/s)": 0.026936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 285.0, "completions/mean_length": 91.97916793823242, "completions/min_length": 38.75, "epoch": 9.159344750558452, "grad_norm": 0.002707772383414763, "kl": 0.2880859375, "learning_rate": 1.768573772298665e-08, "loss": 0.0002876462822314352, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6146, "train_speed(iter/s)": 0.026935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 94.84375190734863, "completions/min_length": 37.0, "epoch": 9.160833953834699, "grad_norm": 2.11100004241051, "kl": 0.301025390625, "learning_rate": 1.762344865951282e-08, "loss": -0.0018807532032951713, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6147, "train_speed(iter/s)": 0.026935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 92.67708587646484, "completions/min_length": 36.0, "epoch": 9.162323157110945, "grad_norm": 0.0031607577370144275, "kl": 0.26611328125, "learning_rate": 1.7561267512621213e-08, "loss": 0.0002658420999068767, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6148, "train_speed(iter/s)": 0.026936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.25, "completions/mean_length": 92.16666793823242, "completions/min_length": 37.0, "epoch": 9.163812360387192, "grad_norm": 0.0029867053027705513, "kl": 0.258056640625, "learning_rate": 1.7499194296223207e-08, "loss": 0.0002578949788585305, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6149, "train_speed(iter/s)": 0.026937 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.75, "completions/mean_length": 89.60416984558105, "completions/min_length": 36.25, "epoch": 9.16530156366344, "grad_norm": 0.0025184012548072413, "kl": 0.263671875, "learning_rate": 1.7437229024205412e-08, "loss": 0.0002634504053276032, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6150, "train_speed(iter/s)": 0.026938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.75, "completions/mean_length": 94.45833587646484, "completions/min_length": 38.5, "epoch": 9.166790766939688, "grad_norm": 0.0023062506140293427, "kl": 0.273681640625, "learning_rate": 1.7375371710430853e-08, "loss": 0.000273527461104095, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6151, "train_speed(iter/s)": 0.026937 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 95.39583778381348, "completions/min_length": 45.75, "epoch": 9.168279970215934, "grad_norm": 0.0026029775583767283, "kl": 0.274169921875, "learning_rate": 1.7313622368738013e-08, "loss": 0.0002736508031375706, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6152, "train_speed(iter/s)": 0.026938 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 106.54166984558105, "completions/min_length": 42.25, "epoch": 9.169769173492181, "grad_norm": 0.002154653050158011, "kl": 0.26123046875, "learning_rate": 1.725198101294145e-08, "loss": 0.0002608334762044251, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6153, "train_speed(iter/s)": 0.026936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.0, "completions/mean_length": 96.06250381469727, "completions/min_length": 34.0, "epoch": 9.17125837676843, "grad_norm": 0.002374141978149278, "kl": 0.28955078125, "learning_rate": 1.7190447656831418e-08, "loss": 0.00028944440418854356, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6154, "train_speed(iter/s)": 0.026935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 100.68750381469727, "completions/min_length": 47.25, "epoch": 9.172747580044676, "grad_norm": 0.673382720559069, "kl": 0.2724609375, "learning_rate": 1.7129022314174014e-08, "loss": -0.0008258262532763183, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6155, "train_speed(iter/s)": 0.026935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 104.71875190734863, "completions/min_length": 41.25, "epoch": 9.174236783320923, "grad_norm": 0.0023854536564061105, "kl": 0.23876953125, "learning_rate": 1.7067704998711408e-08, "loss": 0.0002386090054642409, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6156, "train_speed(iter/s)": 0.026936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 91.36458587646484, "completions/min_length": 42.75, "epoch": 9.17572598659717, "grad_norm": 0.0026997566050820836, "kl": 0.26318359375, "learning_rate": 1.7006495724161295e-08, "loss": 0.00026333931600674987, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6157, "train_speed(iter/s)": 0.026936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.25, "completions/mean_length": 94.25000190734863, "completions/min_length": 31.5, "epoch": 9.177215189873417, "grad_norm": 0.003755118610510079, "kl": 0.283935546875, "learning_rate": 1.6945394504217336e-08, "loss": 0.000284003559499979, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6158, "train_speed(iter/s)": 0.026937 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 95.89583396911621, "completions/min_length": 39.75, "epoch": 9.178704393149665, "grad_norm": 0.0021185798127012594, "kl": 0.265869140625, "learning_rate": 1.688440135254904e-08, "loss": 0.00026612411602400243, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6159, "train_speed(iter/s)": 0.026935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.25, "completions/mean_length": 102.53125190734863, "completions/min_length": 37.75, "epoch": 9.180193596425912, "grad_norm": 0.0022998761612366488, "kl": 0.2607421875, "learning_rate": 1.6823516282801653e-08, "loss": 0.0002607927890494466, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6160, "train_speed(iter/s)": 0.026935 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.0, "completions/mean_length": 107.94791793823242, "completions/min_length": 36.5, "epoch": 9.181682799702159, "grad_norm": 0.002100605050142218, "kl": 0.247314453125, "learning_rate": 1.676273930859634e-08, "loss": 0.0002472896594554186, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6161, "train_speed(iter/s)": 0.026934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 87.70833587646484, "completions/min_length": 38.0, "epoch": 9.183172002978406, "grad_norm": 0.0026785467657714584, "kl": 0.28515625, "learning_rate": 1.6702070443530058e-08, "loss": 0.0002851994941011071, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6162, "train_speed(iter/s)": 0.026934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 96.89583587646484, "completions/min_length": 47.0, "epoch": 9.184661206254654, "grad_norm": 0.0023874936183614142, "kl": 0.2783203125, "learning_rate": 1.6641509701175505e-08, "loss": 0.00027883698930963874, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6163, "train_speed(iter/s)": 0.026934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 100.12500190734863, "completions/min_length": 38.25, "epoch": 9.186150409530901, "grad_norm": 0.0023816920780099695, "kl": 0.262451171875, "learning_rate": 1.6581057095081284e-08, "loss": 0.0002625212073326111, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6164, "train_speed(iter/s)": 0.026936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 95.87500190734863, "completions/min_length": 34.0, "epoch": 9.187639612807148, "grad_norm": 0.002312591030291681, "kl": 0.258544921875, "learning_rate": 1.652071263877175e-08, "loss": 0.00025887531228363514, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6165, "train_speed(iter/s)": 0.026936 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 88.39583778381348, "completions/min_length": 43.25, "epoch": 9.189128816083395, "grad_norm": 0.0370171014388505, "kl": 0.2978515625, "learning_rate": 1.6460476345746987e-08, "loss": 0.00029738410376012325, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6166, "train_speed(iter/s)": 0.026937 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 111.16666793823242, "completions/min_length": 33.25, "epoch": 9.190618019359643, "grad_norm": 0.002177608314870379, "kl": 0.23876953125, "learning_rate": 1.640034822948311e-08, "loss": 0.00023923353001009673, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6167, "train_speed(iter/s)": 0.026937 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.5, "completions/mean_length": 103.01041793823242, "completions/min_length": 40.75, "epoch": 9.19210722263589, "grad_norm": 0.002525138508967938, "kl": 0.254150390625, "learning_rate": 1.634032830343185e-08, "loss": 0.0002543655864428729, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6168, "train_speed(iter/s)": 0.026937 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 286.75, "completions/mean_length": 98.03125190734863, "completions/min_length": 35.75, "epoch": 9.193596425912137, "grad_norm": 0.002348155440153192, "kl": 0.2890625, "learning_rate": 1.628041658102075e-08, "loss": 0.0002889643074013293, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6169, "train_speed(iter/s)": 0.026934 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 88.25000190734863, "completions/min_length": 40.0, "epoch": 9.195085629188384, "grad_norm": 0.002380657475980693, "kl": 0.2646484375, "learning_rate": 1.62206130756532e-08, "loss": 0.0002644808846525848, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6170, "train_speed(iter/s)": 0.026932 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.75, "completions/mean_length": 106.51041793823242, "completions/min_length": 51.5, "epoch": 9.19657483246463, "grad_norm": 0.002992098942743176, "kl": 0.234619140625, "learning_rate": 1.616091780070833e-08, "loss": 0.0002350951253902167, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6171, "train_speed(iter/s)": 0.026931 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.75, "completions/mean_length": 104.13541984558105, "completions/min_length": 36.75, "epoch": 9.19806403574088, "grad_norm": 0.002556338304669488, "kl": 0.262939453125, "learning_rate": 1.6101330769541065e-08, "loss": 0.0002628295624163002, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6172, "train_speed(iter/s)": 0.02693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 94.82291984558105, "completions/min_length": 39.25, "epoch": 9.199553239017126, "grad_norm": 0.002593851358434826, "kl": 0.25341796875, "learning_rate": 1.604185199548225e-08, "loss": 0.00025316118262708187, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6173, "train_speed(iter/s)": 0.02693 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 97.36458587646484, "completions/min_length": 30.0, "epoch": 9.201042442293373, "grad_norm": 0.0023436134655754766, "kl": 0.28125, "learning_rate": 1.598248149183823e-08, "loss": 0.00028125123935751617, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6174, "train_speed(iter/s)": 0.026931 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.75, "completions/mean_length": 95.85416984558105, "completions/min_length": 42.5, "epoch": 9.20253164556962, "grad_norm": 0.0023771963963018926, "kl": 0.272705078125, "learning_rate": 1.5923219271891385e-08, "loss": 0.000273163546808064, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6175, "train_speed(iter/s)": 0.026931 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.25, "completions/mean_length": 93.10416984558105, "completions/min_length": 25.0, "epoch": 9.204020848845868, "grad_norm": 0.0027865995662901383, "kl": 0.2666015625, "learning_rate": 1.586406534889967e-08, "loss": 0.0002670982212293893, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6176, "train_speed(iter/s)": 0.026929 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 98.50000190734863, "completions/min_length": 36.25, "epoch": 9.205510052122115, "grad_norm": 0.0022398755389084345, "kl": 0.246826171875, "learning_rate": 1.5805019736097103e-08, "loss": 0.0002467801095917821, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6177, "train_speed(iter/s)": 0.026928 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.75, "completions/mean_length": 98.30208587646484, "completions/min_length": 33.5, "epoch": 9.206999255398362, "grad_norm": 0.002528029304029343, "kl": 0.271484375, "learning_rate": 1.5746082446693066e-08, "loss": 0.0002711339620873332, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6178, "train_speed(iter/s)": 0.026929 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 95.22916793823242, "completions/min_length": 40.0, "epoch": 9.208488458674609, "grad_norm": 0.002228159281760733, "kl": 0.28857421875, "learning_rate": 1.5687253493873066e-08, "loss": 0.00028853220283053815, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6179, "train_speed(iter/s)": 0.026926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/mean_length": 98.21875190734863, "completions/min_length": 38.5, "epoch": 9.209977661950857, "grad_norm": 1.100298480842801, "kl": 0.27880859375, "learning_rate": 1.562853289079824e-08, "loss": 0.01598496548831463, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6180, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.0, "completions/mean_length": 98.21875381469727, "completions/min_length": 39.25, "epoch": 9.211466865227104, "grad_norm": 0.0026089758310625143, "kl": 0.26171875, "learning_rate": 1.5569920650605306e-08, "loss": 0.00026171491481363773, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6181, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 86.27083587646484, "completions/min_length": 34.5, "epoch": 9.21295606850335, "grad_norm": 0.0031219486627445023, "kl": 0.28759765625, "learning_rate": 1.5511416786407162e-08, "loss": 0.00028692229534499347, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6182, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.25, "completions/mean_length": 97.86458587646484, "completions/min_length": 38.25, "epoch": 9.214445271779597, "grad_norm": 0.002625933400470058, "kl": 0.275634765625, "learning_rate": 1.5453021311292003e-08, "loss": 0.00027569974190555513, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6183, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.75, "completions/mean_length": 100.52083396911621, "completions/min_length": 41.75, "epoch": 9.215934475055844, "grad_norm": 0.004112019727367857, "kl": 0.26220703125, "learning_rate": 1.5394734238324103e-08, "loss": 0.0002623997279442847, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6184, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.0, "completions/mean_length": 88.21875190734863, "completions/min_length": 38.25, "epoch": 9.217423678332093, "grad_norm": 0.004504370893721297, "kl": 0.29638671875, "learning_rate": 1.5336555580543254e-08, "loss": 0.0002962786820717156, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6185, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 101.61458778381348, "completions/min_length": 34.5, "epoch": 9.21891288160834, "grad_norm": 0.019909076401620034, "kl": 0.253662109375, "learning_rate": 1.5278485350965152e-08, "loss": 0.0002532401995267719, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6186, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 89.95833396911621, "completions/min_length": 42.5, "epoch": 9.220402084884586, "grad_norm": 1.1257539179319278, "kl": 0.272705078125, "learning_rate": 1.52205235625813e-08, "loss": -0.005055367015302181, "memory(GiB)": 112.53, "reward": 1.822916716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166865348816, "rewards/CineAccuracyORM/std": 0.2918027453124523, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6187, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.25, "completions/mean_length": 104.92708587646484, "completions/min_length": 39.75, "epoch": 9.221891288160833, "grad_norm": 0.002409756253179076, "kl": 0.258056640625, "learning_rate": 1.51626702283586e-08, "loss": 0.0002582018496468663, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6188, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 88.50000190734863, "completions/min_length": 37.0, "epoch": 9.223380491437082, "grad_norm": 0.0025521594967493535, "kl": 0.271484375, "learning_rate": 1.510492536124014e-08, "loss": 0.00027117159334011376, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6189, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 102.54166793823242, "completions/min_length": 41.25, "epoch": 9.224869694713329, "grad_norm": 0.0022916901774381352, "kl": 0.239990234375, "learning_rate": 1.504728897414431e-08, "loss": 0.00023975997464731336, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6190, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 99.98958587646484, "completions/min_length": 42.0, "epoch": 9.226358897989575, "grad_norm": 0.0028315481251443547, "kl": 0.25146484375, "learning_rate": 1.498976107996558e-08, "loss": 0.0002507594181224704, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6191, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 96.50000190734863, "completions/min_length": 48.25, "epoch": 9.227848101265822, "grad_norm": 0.002382279423324046, "kl": 0.270751953125, "learning_rate": 1.493234169157398e-08, "loss": 0.0002709416439756751, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6192, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.5, "completions/mean_length": 89.25000190734863, "completions/min_length": 40.75, "epoch": 9.22933730454207, "grad_norm": 1.3059083361312334, "kl": 0.26904296875, "learning_rate": 1.4875030821815182e-08, "loss": -0.017290838062763214, "memory(GiB)": 112.53, "reward": 1.8125000596046448, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.3113541752099991, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6193, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 106.05208587646484, "completions/min_length": 44.75, "epoch": 9.230826507818318, "grad_norm": 0.0025852638266403924, "kl": 0.249267578125, "learning_rate": 1.481782848351093e-08, "loss": 0.0002496609231457114, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6194, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 96.44791793823242, "completions/min_length": 38.25, "epoch": 9.232315711094564, "grad_norm": 0.8011018477776413, "kl": 0.267578125, "learning_rate": 1.4760734689458209e-08, "loss": -0.006936111953109503, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6195, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 95.48958778381348, "completions/min_length": 41.75, "epoch": 9.233804914370811, "grad_norm": 0.0023324634771159816, "kl": 0.291015625, "learning_rate": 1.470374945243008e-08, "loss": 0.00029096074285916984, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6196, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.75, "completions/mean_length": 110.18750190734863, "completions/min_length": 40.0, "epoch": 9.235294117647058, "grad_norm": 0.7631364841275966, "kl": 0.236328125, "learning_rate": 1.464687278517518e-08, "loss": 0.013304062187671661, "memory(GiB)": 112.53, "reward": 1.8229166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8229166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6197, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 90.87500381469727, "completions/min_length": 41.75, "epoch": 9.236783320923307, "grad_norm": 0.0026360015059942008, "kl": 0.29296875, "learning_rate": 1.4590104700417826e-08, "loss": 0.0002926884626504034, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6198, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 92.63541793823242, "completions/min_length": 36.0, "epoch": 9.238272524199553, "grad_norm": 0.0023902010289807442, "kl": 0.280517578125, "learning_rate": 1.4533445210858252e-08, "loss": 0.0002805690746754408, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6199, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.5, "completions/mean_length": 101.75000190734863, "completions/min_length": 40.25, "epoch": 9.2397617274758, "grad_norm": 1.3517201641424965, "kl": 0.256591796875, "learning_rate": 1.447689432917204e-08, "loss": 0.017444124445319176, "memory(GiB)": 112.53, "reward": 1.729166716337204, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7291666865348816, "rewards/CineAccuracyORM/std": 0.2874932996928692, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6200, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 88.36458587646484, "completions/min_length": 29.0, "epoch": 9.241250930752047, "grad_norm": 0.0028102721481560403, "kl": 0.27783203125, "learning_rate": 1.4420452068010735e-08, "loss": 0.0002773879677988589, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6201, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.5, "completions/mean_length": 91.88541984558105, "completions/min_length": 46.25, "epoch": 9.242740134028296, "grad_norm": 0.002413755822753789, "kl": 0.27685546875, "learning_rate": 1.4364118440001571e-08, "loss": 0.00027722184313461185, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6202, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 102.87500190734863, "completions/min_length": 45.25, "epoch": 9.244229337304542, "grad_norm": 0.002283067162204284, "kl": 0.251953125, "learning_rate": 1.4307893457747355e-08, "loss": 0.00025132938753813505, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6203, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 95.16666984558105, "completions/min_length": 44.25, "epoch": 9.24571854058079, "grad_norm": 0.003525510861488154, "kl": 0.279541015625, "learning_rate": 1.4251777133826692e-08, "loss": 0.00028012192342430353, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6204, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 99.59375190734863, "completions/min_length": 37.5, "epoch": 9.247207743857036, "grad_norm": 0.007822484993496493, "kl": 0.263427734375, "learning_rate": 1.4195769480793873e-08, "loss": 0.0002630523522384465, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6205, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.25, "completions/mean_length": 99.50000381469727, "completions/min_length": 45.75, "epoch": 9.248696947133284, "grad_norm": 0.002272930111486759, "kl": 0.247802734375, "learning_rate": 1.4139870511178765e-08, "loss": 0.000247770658461377, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6206, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.5, "completions/mean_length": 102.43750381469727, "completions/min_length": 39.25, "epoch": 9.250186150409531, "grad_norm": 0.002891665517832166, "kl": 0.2587890625, "learning_rate": 1.4084080237487138e-08, "loss": 0.00025847938377410173, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6207, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/mean_length": 91.11458778381348, "completions/min_length": 41.5, "epoch": 9.251675353685778, "grad_norm": 0.0094802922006947, "kl": 0.2880859375, "learning_rate": 1.4028398672200181e-08, "loss": 0.0002885197172872722, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6208, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.5, "completions/mean_length": 106.20833587646484, "completions/min_length": 44.75, "epoch": 9.253164556962025, "grad_norm": 0.0036492885066317397, "kl": 0.24365234375, "learning_rate": 1.3972825827774926e-08, "loss": 0.00024407215823885053, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6209, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 100.82291984558105, "completions/min_length": 42.0, "epoch": 9.254653760238272, "grad_norm": 0.002394609904020148, "kl": 0.2529296875, "learning_rate": 1.3917361716644094e-08, "loss": 0.00025242625270038843, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6210, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 84.28125381469727, "completions/min_length": 42.0, "epoch": 9.25614296351452, "grad_norm": 1.2647208952783942, "kl": 0.28466796875, "learning_rate": 1.3862006351216038e-08, "loss": 0.02096571773290634, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6211, "train_speed(iter/s)": 0.026924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.25, "completions/mean_length": 105.01041793823242, "completions/min_length": 42.0, "epoch": 9.257632166790767, "grad_norm": 1.2770732425665452, "kl": 0.257080078125, "learning_rate": 1.3806759743874686e-08, "loss": -0.0033374992199242115, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.3513531759381294, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6212, "train_speed(iter/s)": 0.026925 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 86.28125, "completions/min_length": 35.25, "epoch": 9.259121370067014, "grad_norm": 0.0024281205360845944, "kl": 0.282958984375, "learning_rate": 1.375162190697987e-08, "loss": 0.0002832403115462512, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6213, "train_speed(iter/s)": 0.026926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 96.09375190734863, "completions/min_length": 39.75, "epoch": 9.26061057334326, "grad_norm": 0.002350100413980841, "kl": 0.253173828125, "learning_rate": 1.3696592852866783e-08, "loss": 0.00025286973686888814, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6214, "train_speed(iter/s)": 0.026926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 102.91666984558105, "completions/min_length": 36.75, "epoch": 9.26209977661951, "grad_norm": 0.00220263174523599, "kl": 0.264404296875, "learning_rate": 1.3641672593846632e-08, "loss": 0.00026394298765808344, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6215, "train_speed(iter/s)": 0.026927 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.25, "completions/mean_length": 96.62500190734863, "completions/min_length": 37.25, "epoch": 9.263588979895756, "grad_norm": 0.012852716378870557, "kl": 0.27783203125, "learning_rate": 1.358686114220592e-08, "loss": 0.00027715059695765376, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6216, "train_speed(iter/s)": 0.026925 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.5, "completions/mean_length": 98.32291984558105, "completions/min_length": 33.0, "epoch": 9.265078183172003, "grad_norm": 0.0024634749953765063, "kl": 0.287109375, "learning_rate": 1.3532158510207059e-08, "loss": 0.0002869919699151069, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6217, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 92.15625381469727, "completions/min_length": 43.5, "epoch": 9.26656738644825, "grad_norm": 0.0025019358577381824, "kl": 0.27978515625, "learning_rate": 1.3477564710088096e-08, "loss": 0.00027953035896643996, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6218, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 102.96875190734863, "completions/min_length": 43.75, "epoch": 9.268056589724498, "grad_norm": 0.5343798015440873, "kl": 0.277587890625, "learning_rate": 1.3423079754062539e-08, "loss": 0.0014485337305814028, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6219, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.75, "completions/mean_length": 89.43750190734863, "completions/min_length": 41.0, "epoch": 9.269545793000745, "grad_norm": 0.002420640174718455, "kl": 0.2802734375, "learning_rate": 1.3368703654319912e-08, "loss": 0.0002800106885842979, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6220, "train_speed(iter/s)": 0.026924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 89.73958587646484, "completions/min_length": 39.5, "epoch": 9.271034996276992, "grad_norm": 0.0024492613965775858, "kl": 0.265625, "learning_rate": 1.3314436423024932e-08, "loss": 0.0002652927068993449, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6221, "train_speed(iter/s)": 0.026925 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 94.06250381469727, "completions/min_length": 41.75, "epoch": 9.272524199553239, "grad_norm": 0.002540255612747747, "kl": 0.268798828125, "learning_rate": 1.3260278072318275e-08, "loss": 0.00026913516921922565, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6222, "train_speed(iter/s)": 0.026925 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.0, "completions/mean_length": 94.45833587646484, "completions/min_length": 47.0, "epoch": 9.274013402829485, "grad_norm": 0.0022345928965305127, "kl": 0.2822265625, "learning_rate": 1.3206228614316195e-08, "loss": 0.0002824922266881913, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6223, "train_speed(iter/s)": 0.026924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.75, "completions/mean_length": 93.80208587646484, "completions/min_length": 37.75, "epoch": 9.275502606105734, "grad_norm": 0.002242441645907515, "kl": 0.26904296875, "learning_rate": 1.3152288061110517e-08, "loss": 0.0002687343512661755, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6224, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.25, "completions/mean_length": 94.94791793823242, "completions/min_length": 38.25, "epoch": 9.27699180938198, "grad_norm": 0.0023402219050994624, "kl": 0.25439453125, "learning_rate": 1.3098456424768756e-08, "loss": 0.00025460400502197444, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6225, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.0, "completions/mean_length": 100.07291984558105, "completions/min_length": 42.25, "epoch": 9.278481012658228, "grad_norm": 0.0029182802851177047, "kl": 0.269287109375, "learning_rate": 1.3044733717333945e-08, "loss": 0.0002693341230042279, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6226, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 176.0, "completions/mean_length": 92.06250190734863, "completions/min_length": 40.5, "epoch": 9.279970215934474, "grad_norm": 0.0031170212876247367, "kl": 0.2919921875, "learning_rate": 1.2991119950825136e-08, "loss": 0.0002914724173024297, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6227, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 97.66666793823242, "completions/min_length": 45.5, "epoch": 9.281459419210723, "grad_norm": 2.7504172020045226, "kl": 0.2548828125, "learning_rate": 1.2937615137236402e-08, "loss": 0.010745796374976635, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6228, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 109.86458587646484, "completions/min_length": 52.5, "epoch": 9.28294862248697, "grad_norm": 0.0024528723172412767, "kl": 0.2587890625, "learning_rate": 1.2884219288537889e-08, "loss": 0.0002581780427135527, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6229, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 92.95833396911621, "completions/min_length": 32.0, "epoch": 9.284437825763217, "grad_norm": 0.0025941713107545117, "kl": 0.29345703125, "learning_rate": 1.2830932416675322e-08, "loss": 0.0002930760383605957, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6230, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.25, "completions/mean_length": 95.04166793823242, "completions/min_length": 42.75, "epoch": 9.285927029039463, "grad_norm": 0.7741994746579051, "kl": 0.2626953125, "learning_rate": 1.277775453356983e-08, "loss": 0.011098260059952736, "memory(GiB)": 112.53, "reward": 1.7708333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333358168602, "rewards/CineAccuracyORM/std": 0.23096732050180435, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6231, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 86.29166793823242, "completions/min_length": 38.0, "epoch": 9.287416232315712, "grad_norm": 0.002605689884331389, "kl": 0.2763671875, "learning_rate": 1.272468565111845e-08, "loss": 0.00027619380853138864, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6232, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 90.16666984558105, "completions/min_length": 37.0, "epoch": 9.288905435591959, "grad_norm": 0.0023385141336645645, "kl": 0.27294921875, "learning_rate": 1.2671725781193466e-08, "loss": 0.0002726312377490103, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6233, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 97.52083396911621, "completions/min_length": 44.25, "epoch": 9.290394638868205, "grad_norm": 0.006150171941608634, "kl": 0.274658203125, "learning_rate": 1.261887493564312e-08, "loss": 0.00027478826814331114, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6234, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.5, "completions/mean_length": 92.39583587646484, "completions/min_length": 39.25, "epoch": 9.291883842144452, "grad_norm": 0.0024132964918265865, "kl": 0.27001953125, "learning_rate": 1.2566133126291123e-08, "loss": 0.0002700599143281579, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6235, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.0, "completions/mean_length": 86.90625381469727, "completions/min_length": 41.5, "epoch": 9.293373045420699, "grad_norm": 0.9129634140792818, "kl": 0.2900390625, "learning_rate": 1.2513500364936758e-08, "loss": 0.009408975951373577, "memory(GiB)": 112.53, "reward": 1.90625, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6236, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 274.25, "completions/mean_length": 106.53125190734863, "completions/min_length": 42.75, "epoch": 9.294862248696948, "grad_norm": 0.0023671905966318922, "kl": 0.264404296875, "learning_rate": 1.2460976663354995e-08, "loss": 0.0002639286685734987, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6237, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.5, "completions/mean_length": 94.08333396911621, "completions/min_length": 44.25, "epoch": 9.296351451973194, "grad_norm": 1.1648126182604421, "kl": 0.2861328125, "learning_rate": 1.2408562033296266e-08, "loss": -0.012060761451721191, "memory(GiB)": 112.53, "reward": 1.6354166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.6354166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6238, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 346.25, "completions/mean_length": 112.23958587646484, "completions/min_length": 41.5, "epoch": 9.297840655249441, "grad_norm": 0.002342514295337926, "kl": 0.2529296875, "learning_rate": 1.2356256486486804e-08, "loss": 0.0002532260259613395, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6239, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.25, "completions/mean_length": 99.50000381469727, "completions/min_length": 37.0, "epoch": 9.299329858525688, "grad_norm": 0.06743655359208693, "kl": 0.281494140625, "learning_rate": 1.230406003462825e-08, "loss": 0.00028101919451728463, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6240, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 101.77083396911621, "completions/min_length": 39.75, "epoch": 9.300819061801937, "grad_norm": 0.003144327435318453, "kl": 0.2333984375, "learning_rate": 1.2251972689398038e-08, "loss": 0.0002336655743420124, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6241, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 98.01041984558105, "completions/min_length": 36.25, "epoch": 9.302308265078183, "grad_norm": 0.0024222765872516645, "kl": 0.27490234375, "learning_rate": 1.2199994462448904e-08, "loss": 0.0002747838734649122, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6242, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 87.72916984558105, "completions/min_length": 37.0, "epoch": 9.30379746835443, "grad_norm": 0.0026578923698843807, "kl": 0.26611328125, "learning_rate": 1.2148125365409434e-08, "loss": 0.0002658147714100778, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6243, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 103.06250190734863, "completions/min_length": 40.5, "epoch": 9.305286671630677, "grad_norm": 0.0021246370921292118, "kl": 0.244873046875, "learning_rate": 1.2096365409883735e-08, "loss": 0.0002452086191624403, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6244, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 99.42708778381348, "completions/min_length": 33.75, "epoch": 9.306775874906926, "grad_norm": 0.0022945044899113933, "kl": 0.2646484375, "learning_rate": 1.2044714607451435e-08, "loss": 0.00026407965924590826, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6245, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.75, "completions/mean_length": 93.47916984558105, "completions/min_length": 37.0, "epoch": 9.308265078183172, "grad_norm": 0.002270134763210981, "kl": 0.26611328125, "learning_rate": 1.1993172969667786e-08, "loss": 0.0002661667240317911, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6246, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 103.36458587646484, "completions/min_length": 43.25, "epoch": 9.30975428145942, "grad_norm": 0.002313149683452639, "kl": 0.24755859375, "learning_rate": 1.1941740508063569e-08, "loss": 0.0002472869527991861, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6247, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 101.79166793823242, "completions/min_length": 36.25, "epoch": 9.311243484735666, "grad_norm": 1.2122532848707903, "kl": 0.265625, "learning_rate": 1.1890417234145245e-08, "loss": 0.0024210000410676003, "memory(GiB)": 112.53, "reward": 1.9895833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9895833432674408, "rewards/CineAccuracyORM/std": 0.05103103443980217, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6248, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 91.16666793823242, "completions/min_length": 42.5, "epoch": 9.312732688011913, "grad_norm": 0.0022104334635719793, "kl": 0.271484375, "learning_rate": 1.1839203159394684e-08, "loss": 0.00027181685436517, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6249, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 101.5625, "completions/min_length": 38.75, "epoch": 9.314221891288161, "grad_norm": 0.0024138942962383296, "kl": 0.267333984375, "learning_rate": 1.1788098295269499e-08, "loss": 0.0002674197021406144, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6250, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 89.67708587646484, "completions/min_length": 33.0, "epoch": 9.315711094564408, "grad_norm": 0.0024873599480082207, "kl": 0.283203125, "learning_rate": 1.1737102653202823e-08, "loss": 0.0002834168553818017, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6251, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 101.85416793823242, "completions/min_length": 44.0, "epoch": 9.317200297840655, "grad_norm": 0.06011637083385973, "kl": 0.308349609375, "learning_rate": 1.1686216244603308e-08, "loss": 0.0003079791786149144, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6252, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.25, "completions/mean_length": 84.69791984558105, "completions/min_length": 40.75, "epoch": 9.318689501116902, "grad_norm": 1.791596013280316, "kl": 0.3037109375, "learning_rate": 1.1635439080855125e-08, "loss": 0.004564226139336824, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.07259188033640385, "rewards/CineAccuracyORM/mean": 0.7916666716337204, "rewards/CineAccuracyORM/std": 0.22734662145376205, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6253, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 93.37500190734863, "completions/min_length": 41.0, "epoch": 9.32017870439315, "grad_norm": 0.002350304550002218, "kl": 0.269287109375, "learning_rate": 1.1584771173318076e-08, "loss": 0.00026888420688919723, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6254, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 92.18750190734863, "completions/min_length": 37.0, "epoch": 9.321667907669397, "grad_norm": 0.002133738375904027, "kl": 0.2890625, "learning_rate": 1.1534212533327592e-08, "loss": 0.00028928095707669854, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6255, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.75, "completions/mean_length": 100.46875190734863, "completions/min_length": 50.5, "epoch": 9.323157110945644, "grad_norm": 0.003984253845008212, "kl": 0.2685546875, "learning_rate": 1.1483763172194571e-08, "loss": 0.0002691872068680823, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6256, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 93.01041984558105, "completions/min_length": 30.25, "epoch": 9.32464631422189, "grad_norm": 0.0022523406423097868, "kl": 0.26708984375, "learning_rate": 1.143342310120532e-08, "loss": 0.00026710942620411515, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6257, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.25, "completions/mean_length": 98.76041793823242, "completions/min_length": 35.75, "epoch": 9.32613551749814, "grad_norm": 0.0022798380283650106, "kl": 0.26806640625, "learning_rate": 1.1383192331622104e-08, "loss": 0.00026838501798920333, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6258, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 93.72916984558105, "completions/min_length": 43.25, "epoch": 9.327624720774386, "grad_norm": 0.004033827994427245, "kl": 0.2958984375, "learning_rate": 1.1333070874682216e-08, "loss": 0.00029544622520916164, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6259, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.75, "completions/mean_length": 102.41666984558105, "completions/min_length": 38.5, "epoch": 9.329113924050633, "grad_norm": 0.0022149515702379005, "kl": 0.249267578125, "learning_rate": 1.128305874159896e-08, "loss": 0.00024866859894245863, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6260, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 104.03125, "completions/min_length": 35.25, "epoch": 9.33060312732688, "grad_norm": 0.0029162622752035047, "kl": 0.246826171875, "learning_rate": 1.1233155943560835e-08, "loss": 0.0002461288240738213, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6261, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.0, "completions/mean_length": 103.73958587646484, "completions/min_length": 46.25, "epoch": 9.332092330603126, "grad_norm": 0.0023600962939623435, "kl": 0.252685546875, "learning_rate": 1.118336249173213e-08, "loss": 0.00025250803446397185, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6262, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 93.40625381469727, "completions/min_length": 36.75, "epoch": 9.333581533879375, "grad_norm": 0.055502088413311564, "kl": 0.265869140625, "learning_rate": 1.1133678397252433e-08, "loss": 0.0002653316769283265, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6263, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 93.27083587646484, "completions/min_length": 37.5, "epoch": 9.335070737155622, "grad_norm": 0.002474683998467094, "kl": 0.285400390625, "learning_rate": 1.1084103671237077e-08, "loss": 0.00028507396928034723, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6264, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.25, "completions/mean_length": 89.58333396911621, "completions/min_length": 47.25, "epoch": 9.336559940431869, "grad_norm": 0.003054561713224417, "kl": 0.28564453125, "learning_rate": 1.1034638324776967e-08, "loss": 0.0002848294680006802, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6265, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.5, "completions/mean_length": 91.45833587646484, "completions/min_length": 38.5, "epoch": 9.338049143708115, "grad_norm": 0.003255070370253107, "kl": 0.2919921875, "learning_rate": 1.0985282368938198e-08, "loss": 0.00029185606399551034, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6266, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 94.71875381469727, "completions/min_length": 45.25, "epoch": 9.339538346984364, "grad_norm": 0.00231265259148708, "kl": 0.251220703125, "learning_rate": 1.0936035814762712e-08, "loss": 0.00025068558170460165, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6267, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 93.23958396911621, "completions/min_length": 32.5, "epoch": 9.34102755026061, "grad_norm": 0.002270471752958081, "kl": 0.260009765625, "learning_rate": 1.0886898673267919e-08, "loss": 0.00026021781377494335, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6268, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.5, "completions/mean_length": 84.04166984558105, "completions/min_length": 33.0, "epoch": 9.342516753536858, "grad_norm": 2.7229232073934417, "kl": 0.31884765625, "learning_rate": 1.0837870955446637e-08, "loss": -0.0016767706256359816, "memory(GiB)": 112.53, "reward": 1.7916666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.791666679084301, "rewards/CineAccuracyORM/std": 0.2155592292547226, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6269, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.75, "completions/mean_length": 89.08333587646484, "completions/min_length": 42.5, "epoch": 9.344005956813104, "grad_norm": 0.0052976069062531965, "kl": 0.28369140625, "learning_rate": 1.078895267226737e-08, "loss": 0.00028365378966555, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6270, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 277.75, "completions/mean_length": 101.87500190734863, "completions/min_length": 44.25, "epoch": 9.345495160089353, "grad_norm": 0.5567512878840026, "kl": 0.33203125, "learning_rate": 1.074014383467392e-08, "loss": 0.013776811771094799, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.739583358168602, "rewards/CineAccuracyORM/std": 0.4121886007487774, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6271, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 325.5, "completions/mean_length": 102.91666793823242, "completions/min_length": 44.25, "epoch": 9.3469843633656, "grad_norm": 1.430887760011623, "kl": 0.26708984375, "learning_rate": 1.0691444453585773e-08, "loss": 0.002137422561645508, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 6272, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.75, "completions/mean_length": 93.61458587646484, "completions/min_length": 45.5, "epoch": 9.348473566641847, "grad_norm": 0.002903212356503786, "kl": 0.30029296875, "learning_rate": 1.0642854539897994e-08, "loss": 0.000300128071103245, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6273, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 94.83333587646484, "completions/min_length": 40.5, "epoch": 9.349962769918093, "grad_norm": 0.015280678076642032, "kl": 0.26611328125, "learning_rate": 1.0594374104480886e-08, "loss": 0.0002662025799509138, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6274, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 88.27083587646484, "completions/min_length": 36.0, "epoch": 9.35145197319434, "grad_norm": 2.9250107351913153, "kl": 0.28076171875, "learning_rate": 1.0546003158180494e-08, "loss": 0.00325904693454504, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6275, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 101.03125381469727, "completions/min_length": 40.5, "epoch": 9.352941176470589, "grad_norm": 0.002129533276471382, "kl": 0.25927734375, "learning_rate": 1.0497741711818275e-08, "loss": 0.00025951710995286703, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6276, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 95.10416984558105, "completions/min_length": 35.5, "epoch": 9.354430379746836, "grad_norm": 0.0029687064344663524, "kl": 0.2685546875, "learning_rate": 1.0449589776191314e-08, "loss": 0.0002686164225451648, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6277, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.75, "completions/mean_length": 86.51041984558105, "completions/min_length": 41.5, "epoch": 9.355919583023082, "grad_norm": 0.003779298189974686, "kl": 0.27490234375, "learning_rate": 1.0401547362071938e-08, "loss": 0.0002741638454608619, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6278, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 84.09375381469727, "completions/min_length": 34.75, "epoch": 9.35740878629933, "grad_norm": 0.002461190936132916, "kl": 0.2958984375, "learning_rate": 1.0353614480208217e-08, "loss": 0.00029623048612847924, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6279, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.0, "completions/mean_length": 91.65625190734863, "completions/min_length": 38.5, "epoch": 9.358897989575578, "grad_norm": 1.448021429158193, "kl": 0.2744140625, "learning_rate": 1.0305791141323628e-08, "loss": 0.030904462561011314, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000298023224, "rewards/CineAccuracyORM/std": 0.40787915512919426, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6280, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.75, "completions/mean_length": 88.87500190734863, "completions/min_length": 47.0, "epoch": 9.360387192851825, "grad_norm": 0.0025326180625819313, "kl": 0.28564453125, "learning_rate": 1.0258077356117056e-08, "loss": 0.00028538756305351853, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6281, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/mean_length": 98.19791793823242, "completions/min_length": 33.5, "epoch": 9.361876396128071, "grad_norm": 0.6592920830613281, "kl": 0.27099609375, "learning_rate": 1.0210473135263075e-08, "loss": -0.00967323873192072, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6282, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 91.87500190734863, "completions/min_length": 38.0, "epoch": 9.363365599404318, "grad_norm": 0.002626746534849703, "kl": 0.27978515625, "learning_rate": 1.0162978489411555e-08, "loss": 0.0002797498891595751, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6283, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.5, "completions/mean_length": 103.59375190734863, "completions/min_length": 40.25, "epoch": 9.364854802680567, "grad_norm": 0.002216469944559869, "kl": 0.2646484375, "learning_rate": 1.011559342918794e-08, "loss": 0.0002646185166668147, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6284, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.75, "completions/mean_length": 99.52083587646484, "completions/min_length": 42.75, "epoch": 9.366344005956813, "grad_norm": 0.0025803955391259644, "kl": 0.250732421875, "learning_rate": 1.0068317965193196e-08, "loss": 0.00025027699302881956, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6285, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 100.57291984558105, "completions/min_length": 32.75, "epoch": 9.36783320923306, "grad_norm": 0.002399583378992552, "kl": 0.268310546875, "learning_rate": 1.0021152108003639e-08, "loss": 0.0002687227970454842, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6286, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 88.30208778381348, "completions/min_length": 40.25, "epoch": 9.369322412509307, "grad_norm": 0.003166508656057508, "kl": 0.2744140625, "learning_rate": 9.974095868171162e-09, "loss": 0.00027458113618195057, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6287, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 99.25000190734863, "completions/min_length": 36.75, "epoch": 9.370811615785554, "grad_norm": 0.002500200610498131, "kl": 0.268798828125, "learning_rate": 9.927149256223233e-09, "loss": 0.0002683709026314318, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6288, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 99.75000381469727, "completions/min_length": 47.0, "epoch": 9.372300819061802, "grad_norm": 0.0027259785219494606, "kl": 0.272216796875, "learning_rate": 9.880312282662562e-09, "loss": 0.000271950731985271, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6289, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 85.44791793823242, "completions/min_length": 41.5, "epoch": 9.37379002233805, "grad_norm": 0.0022655576228407816, "kl": 0.2802734375, "learning_rate": 9.83358495796749e-09, "loss": 0.00028033574926666915, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6290, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 90.94791984558105, "completions/min_length": 37.5, "epoch": 9.375279225614296, "grad_norm": 0.7175420981642854, "kl": 0.29150390625, "learning_rate": 9.786967292591764e-09, "loss": -0.01245461031794548, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6291, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.0, "completions/mean_length": 95.18750381469727, "completions/min_length": 41.5, "epoch": 9.376768428890543, "grad_norm": 0.0031387925364253994, "kl": 0.28125, "learning_rate": 9.740459296964599e-09, "loss": 0.00028118357295170426, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6292, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 93.47916984558105, "completions/min_length": 40.0, "epoch": 9.378257632166791, "grad_norm": 0.002613908516818417, "kl": 0.2705078125, "learning_rate": 9.694060981490782e-09, "loss": 0.00027038922416977584, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6293, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.75, "completions/mean_length": 95.83333587646484, "completions/min_length": 37.0, "epoch": 9.379746835443038, "grad_norm": 0.002365722233409764, "kl": 0.267333984375, "learning_rate": 9.6477723565504e-09, "loss": 0.00026730686658993363, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6294, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 95.55208396911621, "completions/min_length": 35.75, "epoch": 9.381236038719285, "grad_norm": 0.002535929627046629, "kl": 0.288330078125, "learning_rate": 9.60159343249911e-09, "loss": 0.0002879715175367892, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6295, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.75, "completions/mean_length": 99.26041984558105, "completions/min_length": 43.25, "epoch": 9.382725241995532, "grad_norm": 0.0024047992801216715, "kl": 0.2607421875, "learning_rate": 9.555524219667988e-09, "loss": 0.00026109637110494077, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6296, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 94.68750190734863, "completions/min_length": 41.75, "epoch": 9.38421444527178, "grad_norm": 0.0025412339779176817, "kl": 0.2666015625, "learning_rate": 9.509564728363506e-09, "loss": 0.00026722706388682127, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6297, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 91.22916984558105, "completions/min_length": 32.25, "epoch": 9.385703648548027, "grad_norm": 0.0025252541486040663, "kl": 0.27001953125, "learning_rate": 9.463714968867831e-09, "loss": 0.0002696606097742915, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6298, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.5, "completions/mean_length": 91.28125190734863, "completions/min_length": 44.25, "epoch": 9.387192851824274, "grad_norm": 0.0022831704462686606, "kl": 0.26806640625, "learning_rate": 9.4179749514382e-09, "loss": 0.00026791199343279004, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6299, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 96.43750190734863, "completions/min_length": 44.25, "epoch": 9.38868205510052, "grad_norm": 0.0026885614217484963, "kl": 0.24951171875, "learning_rate": 9.372344686307653e-09, "loss": 0.00024936150293797255, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6300, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 95.78125381469727, "completions/min_length": 37.25, "epoch": 9.390171258376768, "grad_norm": 0.002565300358848929, "kl": 0.2685546875, "learning_rate": 9.326824183684468e-09, "loss": 0.00026880690711550415, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6301, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 84.67708396911621, "completions/min_length": 43.5, "epoch": 9.391660461653016, "grad_norm": 0.002297692627210033, "kl": 0.3076171875, "learning_rate": 9.281413453752385e-09, "loss": 0.00030706002144142985, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6302, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 93.94791793823242, "completions/min_length": 32.5, "epoch": 9.393149664929263, "grad_norm": 0.0022322993228238138, "kl": 0.29248046875, "learning_rate": 9.236112506670778e-09, "loss": 0.0002916674711741507, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6303, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.25, "completions/mean_length": 86.87500190734863, "completions/min_length": 30.25, "epoch": 9.39463886820551, "grad_norm": 0.0022897627699800297, "kl": 0.287109375, "learning_rate": 9.190921352574044e-09, "loss": 0.0002865914721041918, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6304, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 99.16666793823242, "completions/min_length": 34.25, "epoch": 9.396128071481757, "grad_norm": 0.0022977509537142086, "kl": 0.28369140625, "learning_rate": 9.145840001572536e-09, "loss": 0.0002832737227436155, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6305, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.75, "completions/mean_length": 100.70833587646484, "completions/min_length": 43.75, "epoch": 9.397617274758005, "grad_norm": 0.0021554513686282755, "kl": 0.248779296875, "learning_rate": 9.100868463751688e-09, "loss": 0.0002485389122739434, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6306, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.75, "completions/mean_length": 98.65625381469727, "completions/min_length": 42.5, "epoch": 9.399106478034252, "grad_norm": 0.689496645215813, "kl": 0.352783203125, "learning_rate": 9.056006749172396e-09, "loss": -0.011859700083732605, "memory(GiB)": 112.53, "reward": 1.6562500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6562500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6307, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.25, "completions/mean_length": 105.22916793823242, "completions/min_length": 40.0, "epoch": 9.400595681310499, "grad_norm": 0.004147806317207334, "kl": 0.2646484375, "learning_rate": 9.011254867871244e-09, "loss": 0.0002642461040522903, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6308, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.5, "completions/mean_length": 97.52083587646484, "completions/min_length": 36.5, "epoch": 9.402084884586746, "grad_norm": 0.002327283357130879, "kl": 0.265625, "learning_rate": 8.966612829859888e-09, "loss": 0.00026525260182097554, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6309, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 99.10416793823242, "completions/min_length": 39.0, "epoch": 9.403574087862994, "grad_norm": 0.002445325047462575, "kl": 0.265625, "learning_rate": 8.922080645125619e-09, "loss": 0.00026523738051764667, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6310, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 95.78125190734863, "completions/min_length": 39.0, "epoch": 9.405063291139241, "grad_norm": 0.004893024053529529, "kl": 0.26904296875, "learning_rate": 8.877658323631187e-09, "loss": 0.0002692313864827156, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6311, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 94.45833778381348, "completions/min_length": 41.25, "epoch": 9.406552494415488, "grad_norm": 0.003917576282184996, "kl": 0.26806640625, "learning_rate": 8.833345875314591e-09, "loss": 0.00026800157502293587, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6312, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 88.73958587646484, "completions/min_length": 37.75, "epoch": 9.408041697691734, "grad_norm": 0.002743648843671719, "kl": 0.259521484375, "learning_rate": 8.789143310089398e-09, "loss": 0.00026001842343248427, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6313, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 98.61458587646484, "completions/min_length": 38.5, "epoch": 9.409530900967981, "grad_norm": 0.0024872416934964064, "kl": 0.262451171875, "learning_rate": 8.745050637844532e-09, "loss": 0.0002619482111185789, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6314, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.0, "completions/mean_length": 88.94791984558105, "completions/min_length": 43.25, "epoch": 9.41102010424423, "grad_norm": 1.4961810063856131, "kl": 0.27734375, "learning_rate": 8.701067868444323e-09, "loss": 0.003155488520860672, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.7500000102445483, "rewards/CineAccuracyORM/std": 0.10206206887960434, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6315, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.25, "completions/mean_length": 101.72916984558105, "completions/min_length": 33.25, "epoch": 9.412509307520477, "grad_norm": 0.0023080969229148196, "kl": 0.272705078125, "learning_rate": 8.65719501172857e-09, "loss": 0.0002728227118495852, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6316, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.5, "completions/mean_length": 103.30208587646484, "completions/min_length": 42.25, "epoch": 9.413998510796723, "grad_norm": 0.005195768365631737, "kl": 0.266357421875, "learning_rate": 8.613432077512473e-09, "loss": 0.0002664512721821666, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6317, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 87.38541984558105, "completions/min_length": 34.5, "epoch": 9.41548771407297, "grad_norm": 0.0033009379765457252, "kl": 0.30810546875, "learning_rate": 8.569779075586536e-09, "loss": 0.00030853901989758015, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6318, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.5, "completions/mean_length": 94.38541984558105, "completions/min_length": 40.25, "epoch": 9.416976917349219, "grad_norm": 0.0024276169088124034, "kl": 0.29052734375, "learning_rate": 8.526236015716725e-09, "loss": 0.0002903786371462047, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6319, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 154.5, "completions/mean_length": 80.63541984558105, "completions/min_length": 35.25, "epoch": 9.418466120625466, "grad_norm": 0.002477212888061066, "kl": 0.2861328125, "learning_rate": 8.482802907644526e-09, "loss": 0.00028604533872567117, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6320, "train_speed(iter/s)": 0.026924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 93.90625190734863, "completions/min_length": 44.25, "epoch": 9.419955323901712, "grad_norm": 1.4525342748105707, "kl": 0.263916015625, "learning_rate": 8.43947976108672e-09, "loss": -0.008513316512107849, "memory(GiB)": 112.53, "reward": 1.760416716337204, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166865348816, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6321, "train_speed(iter/s)": 0.026925 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.5, "completions/mean_length": 109.01041984558105, "completions/min_length": 39.0, "epoch": 9.42144452717796, "grad_norm": 0.0027167420634826565, "kl": 0.2490234375, "learning_rate": 8.396266585735446e-09, "loss": 0.0002490318438503891, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6322, "train_speed(iter/s)": 0.026924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 86.17708587646484, "completions/min_length": 32.75, "epoch": 9.422933730454208, "grad_norm": 0.014788804881704627, "kl": 0.28466796875, "learning_rate": 8.3531633912583e-09, "loss": 0.0002847779542207718, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6323, "train_speed(iter/s)": 0.026925 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.25, "completions/mean_length": 103.10416793823242, "completions/min_length": 37.0, "epoch": 9.424422933730455, "grad_norm": 0.0027091063181738386, "kl": 0.252685546875, "learning_rate": 8.310170187298294e-09, "loss": 0.00025207584258168936, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6324, "train_speed(iter/s)": 0.026925 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.5, "completions/mean_length": 96.72916984558105, "completions/min_length": 41.5, "epoch": 9.425912137006701, "grad_norm": 0.0024135077430264486, "kl": 0.26416015625, "learning_rate": 8.267286983473731e-09, "loss": 0.0002641637693159282, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6325, "train_speed(iter/s)": 0.026926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.75, "completions/mean_length": 90.22916793823242, "completions/min_length": 40.25, "epoch": 9.427401340282948, "grad_norm": 0.0022859574221765133, "kl": 0.27880859375, "learning_rate": 8.224513789378496e-09, "loss": 0.0002789167338050902, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6326, "train_speed(iter/s)": 0.026927 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.0, "completions/mean_length": 91.55208587646484, "completions/min_length": 45.75, "epoch": 9.428890543559195, "grad_norm": 0.0024337194213180573, "kl": 0.2666015625, "learning_rate": 8.181850614581709e-09, "loss": 0.000266572751570493, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6327, "train_speed(iter/s)": 0.026929 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.0, "completions/mean_length": 94.38541793823242, "completions/min_length": 31.5, "epoch": 9.430379746835444, "grad_norm": 0.002311442732915653, "kl": 0.250244140625, "learning_rate": 8.139297468627848e-09, "loss": 0.00025018578162416816, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6328, "train_speed(iter/s)": 0.026929 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.25, "completions/mean_length": 91.98958778381348, "completions/min_length": 43.25, "epoch": 9.43186895011169, "grad_norm": 0.0021666052565987664, "kl": 0.287109375, "learning_rate": 8.096854361036909e-09, "loss": 0.00028688443126156926, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6329, "train_speed(iter/s)": 0.026929 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 99.52083778381348, "completions/min_length": 38.25, "epoch": 9.433358153387937, "grad_norm": 0.0023414223497364474, "kl": 0.26416015625, "learning_rate": 8.05452130130413e-09, "loss": 0.00026382989017292857, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6330, "train_speed(iter/s)": 0.026928 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.75, "completions/mean_length": 94.88541984558105, "completions/min_length": 39.0, "epoch": 9.434847356664184, "grad_norm": 0.002130375819172489, "kl": 0.2900390625, "learning_rate": 8.012298298900321e-09, "loss": 0.00029073102632537484, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6331, "train_speed(iter/s)": 0.026928 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.25, "completions/mean_length": 104.60416984558105, "completions/min_length": 43.25, "epoch": 9.436336559940433, "grad_norm": 0.0022148404215275846, "kl": 0.239990234375, "learning_rate": 7.970185363271431e-09, "loss": 0.00023987970780581236, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6332, "train_speed(iter/s)": 0.026928 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.75, "completions/mean_length": 95.02083587646484, "completions/min_length": 39.5, "epoch": 9.43782576321668, "grad_norm": 0.0024548853579882263, "kl": 0.25390625, "learning_rate": 7.928182503838977e-09, "loss": 0.0002530631609261036, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6333, "train_speed(iter/s)": 0.026928 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 99.72916793823242, "completions/min_length": 39.0, "epoch": 9.439314966492926, "grad_norm": 0.0027345682469892382, "kl": 0.270751953125, "learning_rate": 7.886289729999773e-09, "loss": 0.00027072365628555417, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6334, "train_speed(iter/s)": 0.026927 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.5, "completions/mean_length": 86.67708587646484, "completions/min_length": 41.75, "epoch": 9.440804169769173, "grad_norm": 1.144090867027968, "kl": 0.29052734375, "learning_rate": 7.844507051125938e-09, "loss": -0.0065344953909516335, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6335, "train_speed(iter/s)": 0.026928 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 98.58333587646484, "completions/min_length": 35.25, "epoch": 9.442293373045421, "grad_norm": 0.0021261426575112556, "kl": 0.279541015625, "learning_rate": 7.802834476565157e-09, "loss": 0.00027954281540587544, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6336, "train_speed(iter/s)": 0.026926 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 90.11458587646484, "completions/min_length": 37.25, "epoch": 9.443782576321668, "grad_norm": 0.0023699772182560323, "kl": 0.28173828125, "learning_rate": 7.761272015640197e-09, "loss": 0.00028170598670840263, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6337, "train_speed(iter/s)": 0.026924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.5, "completions/mean_length": 103.64583778381348, "completions/min_length": 42.75, "epoch": 9.445271779597915, "grad_norm": 0.028737950402463467, "kl": 0.259521484375, "learning_rate": 7.719819677649508e-09, "loss": 0.00025930299307219684, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6338, "train_speed(iter/s)": 0.026924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 90.21875381469727, "completions/min_length": 43.5, "epoch": 9.446760982874162, "grad_norm": 0.0024784959809356463, "kl": 0.28369140625, "learning_rate": 7.678477471866674e-09, "loss": 0.00028436645516194403, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6339, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.75, "completions/mean_length": 103.47916984558105, "completions/min_length": 43.75, "epoch": 9.448250186150409, "grad_norm": 0.002193894856247881, "kl": 0.2607421875, "learning_rate": 7.637245407540682e-09, "loss": 0.00026088813319802284, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6340, "train_speed(iter/s)": 0.026923 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.0, "completions/mean_length": 90.80208396911621, "completions/min_length": 43.5, "epoch": 9.449739389426657, "grad_norm": 0.002309798464856194, "kl": 0.27197265625, "learning_rate": 7.59612349389599e-09, "loss": 0.00027237163158133626, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6341, "train_speed(iter/s)": 0.026925 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 100.83333587646484, "completions/min_length": 44.25, "epoch": 9.451228592702904, "grad_norm": 0.0023069151503581037, "kl": 0.259765625, "learning_rate": 7.555111740132235e-09, "loss": 0.0002594247634988278, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6342, "train_speed(iter/s)": 0.026924 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 336.0, "completions/mean_length": 89.54166889190674, "completions/min_length": 31.75, "epoch": 9.45271779597915, "grad_norm": 0.33813224913619533, "kl": 0.30810546875, "learning_rate": 7.51421015542464e-09, "loss": 0.06087210029363632, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.0589255653321743, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 6343, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.5, "completions/mean_length": 91.18750190734863, "completions/min_length": 36.5, "epoch": 9.454206999255398, "grad_norm": 0.0027526351069839805, "kl": 0.28173828125, "learning_rate": 7.473418748923544e-09, "loss": 0.00028139917412772775, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6344, "train_speed(iter/s)": 0.026922 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 279.0, "completions/mean_length": 97.97916793823242, "completions/min_length": 40.0, "epoch": 9.455696202531646, "grad_norm": 0.0026970321165926425, "kl": 0.2763671875, "learning_rate": 7.432737529754707e-09, "loss": 0.0002762438089121133, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6345, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.5, "completions/mean_length": 98.42708587646484, "completions/min_length": 37.5, "epoch": 9.457185405807893, "grad_norm": 0.0023976288926211217, "kl": 0.25048828125, "learning_rate": 7.392166507019404e-09, "loss": 0.0002509189653210342, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6346, "train_speed(iter/s)": 0.026921 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.25, "completions/mean_length": 89.12500190734863, "completions/min_length": 46.75, "epoch": 9.45867460908414, "grad_norm": 0.0028449454164281643, "kl": 0.28857421875, "learning_rate": 7.351705689794041e-09, "loss": 0.0002883173292502761, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6347, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.25, "completions/mean_length": 100.57291984558105, "completions/min_length": 46.5, "epoch": 9.460163812360387, "grad_norm": 0.10698313071923019, "kl": 0.3828125, "learning_rate": 7.311355087130488e-09, "loss": 0.00038272570236586034, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6348, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 92.78125190734863, "completions/min_length": 40.25, "epoch": 9.461653015636635, "grad_norm": 0.002312437258291875, "kl": 0.2685546875, "learning_rate": 7.271114708055859e-09, "loss": 0.00026836723554879427, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6349, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 88.70833587646484, "completions/min_length": 40.0, "epoch": 9.463142218912882, "grad_norm": 0.002454729285864942, "kl": 0.29052734375, "learning_rate": 7.230984561572729e-09, "loss": 0.0002905454603023827, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6350, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.75, "completions/mean_length": 104.52083587646484, "completions/min_length": 40.25, "epoch": 9.464631422189129, "grad_norm": 0.0025185810669273224, "kl": 0.25537109375, "learning_rate": 7.1909646566589156e-09, "loss": 0.0002552857040427625, "memory(GiB)": 112.53, "reward": 1.4166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.4166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6351, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.0, "completions/mean_length": 91.94791793823242, "completions/min_length": 38.75, "epoch": 9.466120625465376, "grad_norm": 0.004547920969733781, "kl": 0.273193359375, "learning_rate": 7.151055002267648e-09, "loss": 0.0002737129689194262, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6352, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.25, "completions/mean_length": 91.96875190734863, "completions/min_length": 37.5, "epoch": 9.467609828741622, "grad_norm": 0.00268654660659832, "kl": 0.24560546875, "learning_rate": 7.11125560732756e-09, "loss": 0.00024560524616390467, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6353, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 96.89583778381348, "completions/min_length": 38.5, "epoch": 9.469099032017871, "grad_norm": 0.0029444068996736764, "kl": 0.283203125, "learning_rate": 7.071566480742253e-09, "loss": 0.0002826981362886727, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6354, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.0, "completions/mean_length": 96.13541984558105, "completions/min_length": 41.5, "epoch": 9.470588235294118, "grad_norm": 0.002220457527553317, "kl": 0.27197265625, "learning_rate": 7.031987631391178e-09, "loss": 0.00027199354371987283, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6355, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 91.64583587646484, "completions/min_length": 34.0, "epoch": 9.472077438570365, "grad_norm": 0.002555485451378402, "kl": 0.267822265625, "learning_rate": 6.9925190681286994e-09, "loss": 0.0002674029383342713, "memory(GiB)": 112.53, "reward": 1.75, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.75, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6356, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.0, "completions/mean_length": 98.55208396911621, "completions/min_length": 46.0, "epoch": 9.473566641846611, "grad_norm": 0.0022233211701690463, "kl": 0.271240234375, "learning_rate": 6.953160799784697e-09, "loss": 0.0002710768603719771, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6357, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 94.31250190734863, "completions/min_length": 36.25, "epoch": 9.47505584512286, "grad_norm": 0.00238592903740708, "kl": 0.266357421875, "learning_rate": 6.91391283516446e-09, "loss": 0.00026686699129641056, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6358, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.0, "completions/mean_length": 111.72916984558105, "completions/min_length": 37.25, "epoch": 9.476545048399107, "grad_norm": 1.1990455207102961, "kl": 0.244140625, "learning_rate": 6.874775183048298e-09, "loss": -0.002690692199394107, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6359, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 99.15625190734863, "completions/min_length": 44.25, "epoch": 9.478034251675354, "grad_norm": 0.0026878084798018422, "kl": 0.283203125, "learning_rate": 6.835747852192153e-09, "loss": 0.00028312712674960494, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6360, "train_speed(iter/s)": 0.026913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.75, "completions/mean_length": 90.34375381469727, "completions/min_length": 38.0, "epoch": 9.4795234549516, "grad_norm": 0.002352352815843159, "kl": 0.28515625, "learning_rate": 6.796830851327151e-09, "loss": 0.0002847602590918541, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6361, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.25, "completions/mean_length": 84.04166793823242, "completions/min_length": 35.5, "epoch": 9.481012658227849, "grad_norm": 0.002852430097036573, "kl": 0.2724609375, "learning_rate": 6.7580241891597165e-09, "loss": 0.00027279864298179746, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6362, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 86.53125190734863, "completions/min_length": 39.25, "epoch": 9.482501861504096, "grad_norm": 0.0025045419283808673, "kl": 0.270751953125, "learning_rate": 6.719327874371628e-09, "loss": 0.0002708213869482279, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6363, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 201.25, "completions/mean_length": 92.75000190734863, "completions/min_length": 40.25, "epoch": 9.483991064780342, "grad_norm": 1.7806378442629038, "kl": 0.2607421875, "learning_rate": 6.680741915620014e-09, "loss": -0.01598486863076687, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8020833432674408, "rewards/CineAccuracyORM/std": 0.3252297043800354, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6364, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 98.48958587646484, "completions/min_length": 30.5, "epoch": 9.48548026805659, "grad_norm": 0.0023101178017550087, "kl": 0.27197265625, "learning_rate": 6.642266321537248e-09, "loss": 0.00027196406153962016, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6365, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 95.98958587646484, "completions/min_length": 46.5, "epoch": 9.486969471332836, "grad_norm": 0.002986770226127595, "kl": 0.2646484375, "learning_rate": 6.603901100730946e-09, "loss": 0.000265016860794276, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6366, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 95.00000381469727, "completions/min_length": 37.75, "epoch": 9.488458674609085, "grad_norm": 0.0022259867392665374, "kl": 0.2421875, "learning_rate": 6.565646261784241e-09, "loss": 0.00024282959930133075, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6367, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 87.51041984558105, "completions/min_length": 43.25, "epoch": 9.489947877885331, "grad_norm": 0.0023735445959039325, "kl": 0.279296875, "learning_rate": 6.5275018132553425e-09, "loss": 0.00027894380036741495, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6368, "train_speed(iter/s)": 0.02692 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 96.84375, "completions/min_length": 37.75, "epoch": 9.491437081161578, "grad_norm": 0.011852221978488047, "kl": 0.2734375, "learning_rate": 6.48946776367798e-09, "loss": 0.000273493817076087, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6369, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.0, "completions/mean_length": 95.34375190734863, "completions/min_length": 41.25, "epoch": 9.492926284437825, "grad_norm": 0.007629230876901237, "kl": 0.2802734375, "learning_rate": 6.451544121561014e-09, "loss": 0.000280447187833488, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6370, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.0, "completions/mean_length": 91.70833587646484, "completions/min_length": 37.75, "epoch": 9.494415487714074, "grad_norm": 0.0024695848223578675, "kl": 0.267333984375, "learning_rate": 6.4137308953887136e-09, "loss": 0.00026707936194725335, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6371, "train_speed(iter/s)": 0.026919 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.75, "completions/mean_length": 99.04166984558105, "completions/min_length": 38.0, "epoch": 9.49590469099032, "grad_norm": 0.0030221430655036634, "kl": 0.2734375, "learning_rate": 6.3760280936205335e-09, "loss": 0.00027362885884940624, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6372, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.0, "completions/mean_length": 93.89583587646484, "completions/min_length": 43.0, "epoch": 9.497393894266567, "grad_norm": 0.0025568154372018572, "kl": 0.267578125, "learning_rate": 6.338435724691282e-09, "loss": 0.0002676686563063413, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6373, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/mean_length": 104.25000190734863, "completions/min_length": 44.75, "epoch": 9.498883097542814, "grad_norm": 0.002629113243839928, "kl": 0.259033203125, "learning_rate": 6.300953797011177e-09, "loss": 0.00025900331092998385, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6374, "train_speed(iter/s)": 0.026918 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.25, "completions/mean_length": 84.41667175292969, "completions/min_length": 37.0, "epoch": 9.500372300819063, "grad_norm": 0.002451499286742187, "kl": 0.279541015625, "learning_rate": 6.263582318965455e-09, "loss": 0.00027909729396924376, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6375, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.25, "completions/mean_length": 93.50000381469727, "completions/min_length": 37.5, "epoch": 9.50186150409531, "grad_norm": 0.002172983946836799, "kl": 0.274658203125, "learning_rate": 6.226321298914983e-09, "loss": 0.00027461230638436973, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6376, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 97.04166984558105, "completions/min_length": 36.0, "epoch": 9.503350707371556, "grad_norm": 0.002391710205035923, "kl": 0.290283203125, "learning_rate": 6.189170745195649e-09, "loss": 0.0002901546540670097, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6377, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 91.95833587646484, "completions/min_length": 32.75, "epoch": 9.504839910647803, "grad_norm": 0.0021652063725812127, "kl": 0.27734375, "learning_rate": 6.1521306661186934e-09, "loss": 0.00027705481625162065, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6378, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 96.13541793823242, "completions/min_length": 43.0, "epoch": 9.50632911392405, "grad_norm": 0.002946595058260815, "kl": 0.264892578125, "learning_rate": 6.1152010699707655e-09, "loss": 0.0002649838279467076, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6379, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.25, "completions/mean_length": 98.58333587646484, "completions/min_length": 27.75, "epoch": 9.507818317200298, "grad_norm": 0.0022399373146369576, "kl": 0.260009765625, "learning_rate": 6.078381965013646e-09, "loss": 0.0002604850451461971, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6380, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 100.94791793823242, "completions/min_length": 42.0, "epoch": 9.509307520476545, "grad_norm": 0.003669715866049637, "kl": 0.2509765625, "learning_rate": 6.041673359484411e-09, "loss": 0.0002510721969883889, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6381, "train_speed(iter/s)": 0.026913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 107.82291984558105, "completions/min_length": 49.5, "epoch": 9.510796723752792, "grad_norm": 0.011216124003631991, "kl": 0.250244140625, "learning_rate": 6.005075261595494e-09, "loss": 0.0002502165734767914, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6382, "train_speed(iter/s)": 0.026912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 104.41666984558105, "completions/min_length": 33.75, "epoch": 9.512285927029039, "grad_norm": 1.7733436772040674, "kl": 0.246337890625, "learning_rate": 5.96858767953462e-09, "loss": -0.01038464903831482, "memory(GiB)": 112.53, "reward": 1.8437500596046448, "reward_std": 0.08170493692159653, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.2946811020374298, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6383, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.75, "completions/mean_length": 95.27083587646484, "completions/min_length": 45.0, "epoch": 9.513775130305287, "grad_norm": 0.0023944325120221974, "kl": 0.241455078125, "learning_rate": 5.93221062146465e-09, "loss": 0.0002413386246189475, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6384, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 92.72916793823242, "completions/min_length": 40.25, "epoch": 9.515264333581534, "grad_norm": 0.0030716248782104256, "kl": 0.287109375, "learning_rate": 5.895944095523853e-09, "loss": 0.0002869676100090146, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6385, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.0, "completions/mean_length": 94.97916984558105, "completions/min_length": 37.0, "epoch": 9.516753536857781, "grad_norm": 1.9386293832759147, "kl": 0.26611328125, "learning_rate": 5.859788109825792e-09, "loss": -0.013187865726649761, "memory(GiB)": 112.53, "reward": 1.854166716337204, "reward_std": 0.08311938121914864, "rewards/CineAccuracyORM/mean": 0.8541666865348816, "rewards/CineAccuracyORM/std": 0.28614169359207153, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6386, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 90.22916984558105, "completions/min_length": 42.25, "epoch": 9.518242740134028, "grad_norm": 0.0026082355240090336, "kl": 0.2802734375, "learning_rate": 5.823742672459053e-09, "loss": 0.00028006135835312307, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6387, "train_speed(iter/s)": 0.026908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.0, "completions/mean_length": 95.82291793823242, "completions/min_length": 44.25, "epoch": 9.519731943410276, "grad_norm": 0.0024230136242629745, "kl": 0.28564453125, "learning_rate": 5.787807791487798e-09, "loss": 0.00028513988945633173, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6388, "train_speed(iter/s)": 0.026908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 102.83333396911621, "completions/min_length": 43.0, "epoch": 9.521221146686523, "grad_norm": 0.002201826491504484, "kl": 0.233154296875, "learning_rate": 5.751983474951316e-09, "loss": 0.00023312767734751105, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6389, "train_speed(iter/s)": 0.026908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.5, "completions/mean_length": 98.86458587646484, "completions/min_length": 47.25, "epoch": 9.52271034996277, "grad_norm": 0.011840451380514665, "kl": 0.28271484375, "learning_rate": 5.716269730864143e-09, "loss": 0.0002824908006004989, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6390, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 350.5, "completions/mean_length": 112.58333587646484, "completions/min_length": 49.0, "epoch": 9.524199553239017, "grad_norm": 1.3407053905531503, "kl": 0.233642578125, "learning_rate": 5.680666567216163e-09, "loss": 0.009192809462547302, "memory(GiB)": 112.53, "reward": 1.8958333432674408, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6391, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 264.5, "completions/mean_length": 98.62500190734863, "completions/min_length": 32.0, "epoch": 9.525688756515265, "grad_norm": 0.0038355724377115547, "kl": 0.26904296875, "learning_rate": 5.645173991972341e-09, "loss": 0.00026873755268752575, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6392, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 93.59375, "completions/min_length": 46.75, "epoch": 9.527177959791512, "grad_norm": 0.002369502471643672, "kl": 0.263671875, "learning_rate": 5.6097920130731555e-09, "loss": 0.00026448379503563046, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6393, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 90.97917175292969, "completions/min_length": 40.5, "epoch": 9.528667163067759, "grad_norm": 0.0023978368692892872, "kl": 0.29736328125, "learning_rate": 5.574520638434166e-09, "loss": 0.0002973760711029172, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6394, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 90.62500381469727, "completions/min_length": 43.25, "epoch": 9.530156366344006, "grad_norm": 0.002488420095214771, "kl": 0.255859375, "learning_rate": 5.53935987594617e-09, "loss": 0.0002557835541665554, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6395, "train_speed(iter/s)": 0.026912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.75, "completions/mean_length": 98.01041984558105, "completions/min_length": 40.25, "epoch": 9.531645569620252, "grad_norm": 0.0022590314973250977, "kl": 0.27978515625, "learning_rate": 5.5043097334754314e-09, "loss": 0.00027966086054220796, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6396, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.0, "completions/mean_length": 89.20833587646484, "completions/min_length": 39.5, "epoch": 9.533134772896501, "grad_norm": 0.003528977319272558, "kl": 0.2890625, "learning_rate": 5.469370218863179e-09, "loss": 0.0002887204464059323, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6397, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.25, "completions/mean_length": 102.63541984558105, "completions/min_length": 40.5, "epoch": 9.534623976172748, "grad_norm": 0.006106870594439346, "kl": 0.269775390625, "learning_rate": 5.434541339926047e-09, "loss": 0.00027040857821702957, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6398, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 188.5, "completions/mean_length": 84.51041984558105, "completions/min_length": 35.25, "epoch": 9.536113179448995, "grad_norm": 0.004396043104156266, "kl": 0.277099609375, "learning_rate": 5.399823104455969e-09, "loss": 0.00027647370006889105, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6399, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 186.75, "completions/mean_length": 98.14583587646484, "completions/min_length": 45.75, "epoch": 9.537602382725241, "grad_norm": 0.002372515138466117, "kl": 0.2744140625, "learning_rate": 5.365215520220012e-09, "loss": 0.0002750657731667161, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6400, "train_speed(iter/s)": 0.026912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 92.84375190734863, "completions/min_length": 38.0, "epoch": 9.53909158600149, "grad_norm": 0.0024573483580980792, "kl": 0.29443359375, "learning_rate": 5.330718594960593e-09, "loss": 0.0002938584075309336, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6401, "train_speed(iter/s)": 0.026912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.75, "completions/mean_length": 103.42708587646484, "completions/min_length": 39.25, "epoch": 9.540580789277737, "grad_norm": 0.0022970509232971724, "kl": 0.260498046875, "learning_rate": 5.296332336395149e-09, "loss": 0.00026011219597421587, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6402, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 93.69791984558105, "completions/min_length": 37.75, "epoch": 9.542069992553984, "grad_norm": 0.0023466280955113826, "kl": 0.267578125, "learning_rate": 5.262056752216748e-09, "loss": 0.00026780873304232955, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6403, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 97.13541984558105, "completions/min_length": 34.25, "epoch": 9.54355919583023, "grad_norm": 0.0023586650296702305, "kl": 0.2861328125, "learning_rate": 5.2278918500933134e-09, "loss": 0.0002857409417629242, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6404, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 105.76041793823242, "completions/min_length": 39.5, "epoch": 9.545048399106477, "grad_norm": 0.002340161129212451, "kl": 0.2626953125, "learning_rate": 5.1938376376682305e-09, "loss": 0.0002626060741022229, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6405, "train_speed(iter/s)": 0.026908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 90.75000190734863, "completions/min_length": 34.25, "epoch": 9.546537602382726, "grad_norm": 0.0029834683890296036, "kl": 0.29052734375, "learning_rate": 5.159894122560016e-09, "loss": 0.0002900403051171452, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6406, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.25, "completions/mean_length": 96.30208587646484, "completions/min_length": 34.25, "epoch": 9.548026805658973, "grad_norm": 0.0025206811651366473, "kl": 0.255615234375, "learning_rate": 5.12606131236254e-09, "loss": 0.00025534347514621913, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6407, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 89.88541793823242, "completions/min_length": 41.75, "epoch": 9.54951600893522, "grad_norm": 0.0022393452077566034, "kl": 0.2802734375, "learning_rate": 5.092339214644747e-09, "loss": 0.0002803376701194793, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6408, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 102.89583587646484, "completions/min_length": 41.0, "epoch": 9.551005212211466, "grad_norm": 0.0021134846732736253, "kl": 0.2470703125, "learning_rate": 5.058727836950938e-09, "loss": 0.00024674786254763603, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6409, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 86.39583587646484, "completions/min_length": 35.75, "epoch": 9.552494415487715, "grad_norm": 0.002487677091092207, "kl": 0.27734375, "learning_rate": 5.025227186800652e-09, "loss": 0.0002769952407106757, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6410, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 97.96875, "completions/min_length": 39.75, "epoch": 9.553983618763962, "grad_norm": 0.0022284787650103106, "kl": 0.255615234375, "learning_rate": 4.991837271688504e-09, "loss": 0.0002555465616751462, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6411, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.25, "completions/mean_length": 100.32291793823242, "completions/min_length": 48.75, "epoch": 9.555472822040208, "grad_norm": 0.0028182090387735468, "kl": 0.267822265625, "learning_rate": 4.95855809908452e-09, "loss": 0.0002678694436326623, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6412, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 103.80208587646484, "completions/min_length": 44.5, "epoch": 9.556962025316455, "grad_norm": 0.0035525724645287205, "kl": 0.275390625, "learning_rate": 4.925389676433745e-09, "loss": 0.00027551973471418023, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6413, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.25, "completions/mean_length": 92.92708587646484, "completions/min_length": 42.0, "epoch": 9.558451228592704, "grad_norm": 0.0022886187031563735, "kl": 0.2646484375, "learning_rate": 4.892332011156741e-09, "loss": 0.0002647408691700548, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6414, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.0, "completions/mean_length": 98.71875, "completions/min_length": 45.0, "epoch": 9.55994043186895, "grad_norm": 0.0023945643395026518, "kl": 0.258056640625, "learning_rate": 4.859385110648984e-09, "loss": 0.0002583930327091366, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6415, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.5, "completions/mean_length": 94.76041984558105, "completions/min_length": 42.5, "epoch": 9.561429635145197, "grad_norm": 1.691134367804079, "kl": 0.261474609375, "learning_rate": 4.8265489822814084e-09, "loss": 0.0016125526744872332, "memory(GiB)": 112.53, "reward": 1.84375, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.84375, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6416, "train_speed(iter/s)": 0.026908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 171.75, "completions/mean_length": 82.14583587646484, "completions/min_length": 42.25, "epoch": 9.562918838421444, "grad_norm": 2.6785090652019408, "kl": 0.34423828125, "learning_rate": 4.793823633400029e-09, "loss": 0.005562224891036749, "memory(GiB)": 112.53, "reward": 1.6145833730697632, "reward_std": 0.07400632463395596, "rewards/CineAccuracyORM/mean": 0.6145833507180214, "rewards/CineAccuracyORM/std": 0.4194912277162075, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6417, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 100.33333778381348, "completions/min_length": 43.25, "epoch": 9.564408041697693, "grad_norm": 1.1154684562237789, "kl": 0.265869140625, "learning_rate": 4.761209071326045e-09, "loss": -0.012522144243121147, "memory(GiB)": 112.53, "reward": 1.7708333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.7708333432674408, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6418, "train_speed(iter/s)": 0.026906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.5, "completions/mean_length": 103.26041984558105, "completions/min_length": 45.25, "epoch": 9.56589724497394, "grad_norm": 0.0023062308567125107, "kl": 0.253173828125, "learning_rate": 4.728705303356007e-09, "loss": 0.0002532229700591415, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6419, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.0, "completions/mean_length": 101.84375381469727, "completions/min_length": 36.75, "epoch": 9.567386448250186, "grad_norm": 0.0035807826132734006, "kl": 0.282470703125, "learning_rate": 4.696312336761599e-09, "loss": 0.00028255124925635755, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6420, "train_speed(iter/s)": 0.026905 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.25, "completions/mean_length": 98.20833587646484, "completions/min_length": 43.0, "epoch": 9.568875651526433, "grad_norm": 0.0027169795878387356, "kl": 0.27392578125, "learning_rate": 4.664030178789746e-09, "loss": 0.00027396256336942315, "memory(GiB)": 112.53, "reward": 1.416666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.416666679084301, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6421, "train_speed(iter/s)": 0.026905 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.0, "completions/mean_length": 96.66666793823242, "completions/min_length": 44.5, "epoch": 9.57036485480268, "grad_norm": 0.002673742530115352, "kl": 0.2783203125, "learning_rate": 4.631858836662561e-09, "loss": 0.00027850503101944923, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6422, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.5, "completions/mean_length": 96.79166984558105, "completions/min_length": 45.75, "epoch": 9.571854058078928, "grad_norm": 0.0024625049228190698, "kl": 0.244873046875, "learning_rate": 4.5997983175773416e-09, "loss": 0.00024469889467582107, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6423, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.0, "completions/mean_length": 98.26041793823242, "completions/min_length": 36.25, "epoch": 9.573343261355175, "grad_norm": 0.0023441000117506543, "kl": 0.283203125, "learning_rate": 4.567848628706683e-09, "loss": 0.00028347407351247966, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6424, "train_speed(iter/s)": 0.026904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 90.14583587646484, "completions/min_length": 30.75, "epoch": 9.574832464631422, "grad_norm": 0.002553169203568437, "kl": 0.2509765625, "learning_rate": 4.536009777198202e-09, "loss": 0.00025084323715418577, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6425, "train_speed(iter/s)": 0.026906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.75, "completions/mean_length": 94.05208587646484, "completions/min_length": 45.25, "epoch": 9.576321667907669, "grad_norm": 0.004314529384423609, "kl": 0.27294921875, "learning_rate": 4.5042817701749225e-09, "loss": 0.0002729535917751491, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6426, "train_speed(iter/s)": 0.026905 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 103.01041984558105, "completions/min_length": 47.25, "epoch": 9.577810871183917, "grad_norm": 0.00237901146092625, "kl": 0.258544921875, "learning_rate": 4.472664614735056e-09, "loss": 0.0002587359631434083, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6427, "train_speed(iter/s)": 0.026904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.75, "completions/mean_length": 102.08333396911621, "completions/min_length": 42.25, "epoch": 9.579300074460164, "grad_norm": 0.7272158104417128, "kl": 0.259033203125, "learning_rate": 4.441158317951776e-09, "loss": -0.002015815582126379, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6428, "train_speed(iter/s)": 0.026905 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.5, "completions/mean_length": 102.05208587646484, "completions/min_length": 48.5, "epoch": 9.580789277736411, "grad_norm": 0.0021430436736423336, "kl": 0.253173828125, "learning_rate": 4.40976288687378e-09, "loss": 0.0002531512873247266, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6429, "train_speed(iter/s)": 0.026905 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.0, "completions/mean_length": 98.05208778381348, "completions/min_length": 39.5, "epoch": 9.582278481012658, "grad_norm": 0.0026737306834645025, "kl": 0.263671875, "learning_rate": 4.378478328524726e-09, "loss": 0.00026375960442237556, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6430, "train_speed(iter/s)": 0.026905 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 88.48958587646484, "completions/min_length": 29.5, "epoch": 9.583767684288905, "grad_norm": 0.0021526540749292997, "kl": 0.28564453125, "learning_rate": 4.347304649903572e-09, "loss": 0.00028613043832592666, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6431, "train_speed(iter/s)": 0.026904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.5, "completions/mean_length": 92.94791793823242, "completions/min_length": 37.0, "epoch": 9.585256887565153, "grad_norm": 0.0024910444840047386, "kl": 0.27001953125, "learning_rate": 4.31624185798446e-09, "loss": 0.00026938767405226827, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6432, "train_speed(iter/s)": 0.026904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 89.89583587646484, "completions/min_length": 32.5, "epoch": 9.5867460908414, "grad_norm": 0.002497991246515627, "kl": 0.283447265625, "learning_rate": 4.285289959716665e-09, "loss": 0.0002837218053173274, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6433, "train_speed(iter/s)": 0.026904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.0, "completions/mean_length": 91.80208587646484, "completions/min_length": 40.0, "epoch": 9.588235294117647, "grad_norm": 0.0024895636173180406, "kl": 0.264404296875, "learning_rate": 4.254448962024815e-09, "loss": 0.00026417302433401346, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6434, "train_speed(iter/s)": 0.026906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 269.0, "completions/mean_length": 104.45833778381348, "completions/min_length": 40.5, "epoch": 9.589724497393894, "grad_norm": 0.016241475478725124, "kl": 0.279296875, "learning_rate": 4.223718871808502e-09, "loss": 0.0002790524158626795, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6435, "train_speed(iter/s)": 0.026904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 102.57291984558105, "completions/min_length": 45.25, "epoch": 9.591213700670142, "grad_norm": 0.0023596327707120267, "kl": 0.263427734375, "learning_rate": 4.193099695942615e-09, "loss": 0.00026293486007489264, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6436, "train_speed(iter/s)": 0.026905 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.75, "completions/mean_length": 91.68750381469727, "completions/min_length": 38.0, "epoch": 9.592702903946389, "grad_norm": 0.002556487058653513, "kl": 0.29248046875, "learning_rate": 4.162591441277341e-09, "loss": 0.00029273267136886716, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6437, "train_speed(iter/s)": 0.026906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 93.56250190734863, "completions/min_length": 37.75, "epoch": 9.594192107222636, "grad_norm": 0.002760716099599491, "kl": 0.272705078125, "learning_rate": 4.132194114637833e-09, "loss": 0.00027218105969950557, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6438, "train_speed(iter/s)": 0.026906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.25, "completions/mean_length": 95.43750190734863, "completions/min_length": 36.0, "epoch": 9.595681310498883, "grad_norm": 0.002552714888370537, "kl": 0.27880859375, "learning_rate": 4.101907722824538e-09, "loss": 0.0002789646969176829, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6439, "train_speed(iter/s)": 0.026904 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.5, "completions/mean_length": 97.12500190734863, "completions/min_length": 42.0, "epoch": 9.597170513775131, "grad_norm": 0.004545818915420268, "kl": 0.27978515625, "learning_rate": 4.071732272613148e-09, "loss": 0.0002796650514937937, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6440, "train_speed(iter/s)": 0.026906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.25, "completions/mean_length": 105.83333587646484, "completions/min_length": 39.75, "epoch": 9.598659717051378, "grad_norm": 0.002250674001853772, "kl": 0.2490234375, "learning_rate": 4.041667770754431e-09, "loss": 0.00024883681908249855, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6441, "train_speed(iter/s)": 0.026906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 89.11458778381348, "completions/min_length": 41.25, "epoch": 9.600148920327625, "grad_norm": 0.002387632123459808, "kl": 0.263671875, "learning_rate": 4.011714223974394e-09, "loss": 0.0002633197873365134, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6442, "train_speed(iter/s)": 0.026906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 87.17708587646484, "completions/min_length": 36.5, "epoch": 9.601638123603871, "grad_norm": 0.0027586577283776207, "kl": 0.294921875, "learning_rate": 3.981871638974177e-09, "loss": 0.00029495026683434844, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6443, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 89.41666793823242, "completions/min_length": 36.25, "epoch": 9.60312732688012, "grad_norm": 0.002689445038913119, "kl": 0.283203125, "learning_rate": 3.9521400224301616e-09, "loss": 0.0002829373115673661, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6444, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.5, "completions/mean_length": 102.30208587646484, "completions/min_length": 37.0, "epoch": 9.604616530156367, "grad_norm": 0.0029128961651859633, "kl": 0.2744140625, "learning_rate": 3.922519380993805e-09, "loss": 0.0002745989477261901, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6445, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.75, "completions/mean_length": 116.46875381469727, "completions/min_length": 50.0, "epoch": 9.606105733432614, "grad_norm": 0.002556358565727739, "kl": 0.2197265625, "learning_rate": 3.8930097212918625e-09, "loss": 0.00021994014969095588, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6446, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.75, "completions/mean_length": 100.43750381469727, "completions/min_length": 31.75, "epoch": 9.60759493670886, "grad_norm": 0.7167053580462973, "kl": 0.265380859375, "learning_rate": 3.863611049926163e-09, "loss": -0.01343418937176466, "memory(GiB)": 112.53, "reward": 1.7395834028720856, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 0.9895833432674408, "rewards/Format/std": 0.05103103443980217, "step": 6447, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.0, "completions/mean_length": 93.58333587646484, "completions/min_length": 39.75, "epoch": 9.609084139985107, "grad_norm": 0.0026310533862306715, "kl": 0.2734375, "learning_rate": 3.834323373473669e-09, "loss": 0.00027336616767570376, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6448, "train_speed(iter/s)": 0.026906 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 79.89583587646484, "completions/min_length": 32.5, "epoch": 9.610573343261356, "grad_norm": 0.00261242188406271, "kl": 0.29931640625, "learning_rate": 3.805146698486694e-09, "loss": 0.0002991019864566624, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6449, "train_speed(iter/s)": 0.026908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.75, "completions/mean_length": 94.51041984558105, "completions/min_length": 39.5, "epoch": 9.612062546537603, "grad_norm": 0.0031159098585409927, "kl": 0.283203125, "learning_rate": 3.776081031492462e-09, "loss": 0.00028319208649918437, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6450, "train_speed(iter/s)": 0.026908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 100.70833396911621, "completions/min_length": 42.0, "epoch": 9.61355174981385, "grad_norm": 0.0023396450488388522, "kl": 0.25830078125, "learning_rate": 3.747126378993604e-09, "loss": 0.0002584283356554806, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6451, "train_speed(iter/s)": 0.026908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.5, "completions/mean_length": 90.29166793823242, "completions/min_length": 42.5, "epoch": 9.615040953090096, "grad_norm": 0.00217533263174361, "kl": 0.273193359375, "learning_rate": 3.718282747467827e-09, "loss": 0.000272676203167066, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6452, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.75, "completions/mean_length": 87.21875190734863, "completions/min_length": 42.0, "epoch": 9.616530156366345, "grad_norm": 0.0025847563925176864, "kl": 0.2841796875, "learning_rate": 3.68955014336797e-09, "loss": 0.00028384599136188626, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6453, "train_speed(iter/s)": 0.026908 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.5, "completions/mean_length": 96.94791793823242, "completions/min_length": 44.75, "epoch": 9.618019359642592, "grad_norm": 0.004140964470281232, "kl": 0.26953125, "learning_rate": 3.660928573122002e-09, "loss": 0.000269299722276628, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6454, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 98.60416984558105, "completions/min_length": 43.0, "epoch": 9.619508562918838, "grad_norm": 0.552752287265688, "kl": 0.274169921875, "learning_rate": 3.6324180431330786e-09, "loss": -0.008451118133962154, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6455, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 84.35416984558105, "completions/min_length": 38.25, "epoch": 9.620997766195085, "grad_norm": 0.0036717716705320018, "kl": 0.2802734375, "learning_rate": 3.604018559779598e-09, "loss": 0.000280385633232072, "memory(GiB)": 112.53, "reward": 1.5000000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5000000149011612, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6456, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 93.58333778381348, "completions/min_length": 44.25, "epoch": 9.622486969471332, "grad_norm": 0.0027860502740256355, "kl": 0.27978515625, "learning_rate": 3.575730129415089e-09, "loss": 0.000279697822406888, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6457, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 256.0, "completions/mean_length": 100.55208587646484, "completions/min_length": 34.75, "epoch": 9.62397617274758, "grad_norm": 1.3314411352464295, "kl": 0.231201171875, "learning_rate": 3.5475527583680997e-09, "loss": 0.0033086459152400494, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6458, "train_speed(iter/s)": 0.026913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.5, "completions/mean_length": 106.79166984558105, "completions/min_length": 40.75, "epoch": 9.625465376023827, "grad_norm": 0.0022620377731707875, "kl": 0.24560546875, "learning_rate": 3.5194864529424774e-09, "loss": 0.0002455758803989738, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6459, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.25, "completions/mean_length": 100.90625190734863, "completions/min_length": 42.0, "epoch": 9.626954579300074, "grad_norm": 0.8419893815946826, "kl": 0.265625, "learning_rate": 3.4915312194171986e-09, "loss": -0.017830129712820053, "memory(GiB)": 112.53, "reward": 1.90625, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.90625, "rewards/CineAccuracyORM/std": 0.12363383919000626, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6460, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.0, "completions/mean_length": 88.51041793823242, "completions/min_length": 37.75, "epoch": 9.628443782576321, "grad_norm": 0.0035440588903400926, "kl": 0.293212890625, "learning_rate": 3.4636870640463167e-09, "loss": 0.0002936299715656787, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6461, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.5, "completions/mean_length": 91.87500190734863, "completions/min_length": 39.0, "epoch": 9.62993298585257, "grad_norm": 0.002278658158404642, "kl": 0.29345703125, "learning_rate": 3.435953993059126e-09, "loss": 0.00029327854281291366, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6462, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.75, "completions/mean_length": 87.68750381469727, "completions/min_length": 38.5, "epoch": 9.631422189128816, "grad_norm": 0.0022517001110417168, "kl": 0.27587890625, "learning_rate": 3.408332012659998e-09, "loss": 0.0002761615323834121, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6463, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 97.89583587646484, "completions/min_length": 46.0, "epoch": 9.632911392405063, "grad_norm": 0.8604451576811389, "kl": 0.2626953125, "learning_rate": 3.380821129028488e-09, "loss": 0.001112167607061565, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6464, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 95.61458587646484, "completions/min_length": 41.5, "epoch": 9.63440059568131, "grad_norm": 0.0023156418771392265, "kl": 0.2744140625, "learning_rate": 3.353421348319341e-09, "loss": 0.0002744394005276263, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6465, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.5, "completions/mean_length": 101.72916984558105, "completions/min_length": 42.0, "epoch": 9.635889798957558, "grad_norm": 0.0024620401719173306, "kl": 0.24755859375, "learning_rate": 3.3261326766623187e-09, "loss": 0.00024721890804357827, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6466, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 170.75, "completions/mean_length": 94.07291984558105, "completions/min_length": 52.0, "epoch": 9.637379002233805, "grad_norm": 0.002763781569837951, "kl": 0.26611328125, "learning_rate": 3.2989551201624832e-09, "loss": 0.000265600741840899, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6467, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.25, "completions/mean_length": 105.51042175292969, "completions/min_length": 40.75, "epoch": 9.638868205510052, "grad_norm": 0.0021219497539270537, "kl": 0.2490234375, "learning_rate": 3.2718886848998596e-09, "loss": 0.0002486113808117807, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6468, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 204.75, "completions/mean_length": 99.03125381469727, "completions/min_length": 42.5, "epoch": 9.640357408786299, "grad_norm": 0.0023998983776622935, "kl": 0.265625, "learning_rate": 3.244933376929826e-09, "loss": 0.00026590810739435256, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6469, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 166.75, "completions/mean_length": 76.04166984558105, "completions/min_length": 28.25, "epoch": 9.641846612062547, "grad_norm": 0.006710037901820343, "kl": 0.314453125, "learning_rate": 3.21808920228267e-09, "loss": 0.0003139699692837894, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6470, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.25, "completions/mean_length": 88.78125190734863, "completions/min_length": 39.25, "epoch": 9.643335815338794, "grad_norm": 0.002226809119125904, "kl": 0.264404296875, "learning_rate": 3.1913561669640322e-09, "loss": 0.0002645585627760738, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6471, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.25, "completions/mean_length": 96.67708587646484, "completions/min_length": 36.0, "epoch": 9.644825018615041, "grad_norm": 0.004382550921891425, "kl": 0.27685546875, "learning_rate": 3.164734276954517e-09, "loss": 0.00027689672424457967, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6472, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.25, "completions/mean_length": 97.20833587646484, "completions/min_length": 43.5, "epoch": 9.646314221891288, "grad_norm": 0.0027012021894776365, "kl": 0.2685546875, "learning_rate": 3.138223538209972e-09, "loss": 0.00026890303706750274, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6473, "train_speed(iter/s)": 0.026917 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.0, "completions/mean_length": 92.50000190734863, "completions/min_length": 42.25, "epoch": 9.647803425167535, "grad_norm": 0.0023907511662468214, "kl": 0.259765625, "learning_rate": 3.111823956661319e-09, "loss": 0.00025962028303183615, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6474, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.25, "completions/mean_length": 99.22916984558105, "completions/min_length": 31.5, "epoch": 9.649292628443783, "grad_norm": 0.002537475273689776, "kl": 0.27001953125, "learning_rate": 3.0855355382146676e-09, "loss": 0.00026983884163200855, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6475, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.25, "completions/mean_length": 104.12500190734863, "completions/min_length": 45.25, "epoch": 9.65078183172003, "grad_norm": 0.8157619046081214, "kl": 0.2705078125, "learning_rate": 3.0593582887512014e-09, "loss": 0.0024173217825591564, "memory(GiB)": 112.53, "reward": 1.6770833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.6770833507180214, "rewards/CineAccuracyORM/std": 0.3568481206893921, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6476, "train_speed(iter/s)": 0.026916 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 99.73958587646484, "completions/min_length": 34.25, "epoch": 9.652271034996277, "grad_norm": 0.0023137086194665057, "kl": 0.2578125, "learning_rate": 3.0332922141272364e-09, "loss": 0.00025787740014493465, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6477, "train_speed(iter/s)": 0.026915 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 94.71875190734863, "completions/min_length": 39.75, "epoch": 9.653760238272524, "grad_norm": 0.0022639443898534273, "kl": 0.256103515625, "learning_rate": 3.0073373201742747e-09, "loss": 0.0002563666785135865, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6478, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 97.79166793823242, "completions/min_length": 35.5, "epoch": 9.655249441548772, "grad_norm": 0.002406096293134027, "kl": 0.257080078125, "learning_rate": 2.9814936126988376e-09, "loss": 0.0002569410135038197, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6479, "train_speed(iter/s)": 0.026912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 96.66666984558105, "completions/min_length": 41.75, "epoch": 9.656738644825019, "grad_norm": 0.00235034247845866, "kl": 0.28173828125, "learning_rate": 2.9557610974828006e-09, "loss": 0.0002812477177940309, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6480, "train_speed(iter/s)": 0.026912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 93.47916793823242, "completions/min_length": 36.0, "epoch": 9.658227848101266, "grad_norm": 1.5046928040542662, "kl": 0.267822265625, "learning_rate": 2.9301397802828364e-09, "loss": 0.0037440876476466656, "memory(GiB)": 112.53, "reward": 1.96875, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.96875, "rewards/CineAccuracyORM/std": 0.08445799350738525, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6481, "train_speed(iter/s)": 0.026914 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.75, "completions/mean_length": 98.94791984558105, "completions/min_length": 41.5, "epoch": 9.659717051377513, "grad_norm": 0.0021787774151219986, "kl": 0.254638671875, "learning_rate": 2.904629666830971e-09, "loss": 0.00025468040257692337, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6482, "train_speed(iter/s)": 0.026913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.5, "completions/mean_length": 89.46875190734863, "completions/min_length": 44.25, "epoch": 9.66120625465376, "grad_norm": 0.0022389366343551617, "kl": 0.28955078125, "learning_rate": 2.879230762834306e-09, "loss": 0.0002889298484660685, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6483, "train_speed(iter/s)": 0.026912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 295.5, "completions/mean_length": 105.78125381469727, "completions/min_length": 43.25, "epoch": 9.662695457930008, "grad_norm": 0.0023119815099186185, "kl": 0.24267578125, "learning_rate": 2.853943073975018e-09, "loss": 0.0002425367129035294, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6484, "train_speed(iter/s)": 0.026912 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.5, "completions/mean_length": 104.20833587646484, "completions/min_length": 42.0, "epoch": 9.664184661206255, "grad_norm": 0.0023928176360229953, "kl": 0.268798828125, "learning_rate": 2.8287666059104707e-09, "loss": 0.00026857396005652845, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6485, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 100.02083587646484, "completions/min_length": 37.0, "epoch": 9.665673864482502, "grad_norm": 0.002390698288762171, "kl": 0.235595703125, "learning_rate": 2.803701364273048e-09, "loss": 0.00023525740834884346, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6486, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 98.71875, "completions/min_length": 42.5, "epoch": 9.667163067758748, "grad_norm": 0.002484835159566768, "kl": 0.247314453125, "learning_rate": 2.7787473546703744e-09, "loss": 0.0002469067112542689, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6487, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 93.94791984558105, "completions/min_length": 38.0, "epoch": 9.668652271034997, "grad_norm": 0.002725836567333119, "kl": 0.28076171875, "learning_rate": 2.753904582685096e-09, "loss": 0.0002808638964779675, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6488, "train_speed(iter/s)": 0.026913 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.0, "completions/mean_length": 97.62500190734863, "completions/min_length": 42.25, "epoch": 9.670141474311244, "grad_norm": 0.003022246478147605, "kl": 0.273681640625, "learning_rate": 2.729173053874989e-09, "loss": 0.00027342664543539286, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6489, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.25, "completions/mean_length": 99.68750381469727, "completions/min_length": 44.5, "epoch": 9.67163067758749, "grad_norm": 0.0023344776585096567, "kl": 0.26318359375, "learning_rate": 2.7045527737729613e-09, "loss": 0.000263228896073997, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6490, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 185.25, "completions/mean_length": 91.76041793823242, "completions/min_length": 40.5, "epoch": 9.673119880863737, "grad_norm": 1.1982317783028174, "kl": 0.27099609375, "learning_rate": 2.680043747887051e-09, "loss": -0.0043084933422505856, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6491, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.75, "completions/mean_length": 97.91666793823242, "completions/min_length": 40.0, "epoch": 9.674609084139986, "grad_norm": 1.082990161563133, "kl": 0.256591796875, "learning_rate": 2.655645981700372e-09, "loss": 0.04207072779536247, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6492, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.5, "completions/mean_length": 84.78125381469727, "completions/min_length": 40.0, "epoch": 9.676098287416233, "grad_norm": 0.0031244684819982512, "kl": 0.3037109375, "learning_rate": 2.6313594806711136e-09, "loss": 0.0003030269290320575, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6493, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.5, "completions/mean_length": 100.87500381469727, "completions/min_length": 39.75, "epoch": 9.67758749069248, "grad_norm": 0.0023452764866209027, "kl": 0.2451171875, "learning_rate": 2.6071842502326526e-09, "loss": 0.00024428428150713444, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6494, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 96.21875190734863, "completions/min_length": 46.25, "epoch": 9.679076693968726, "grad_norm": 0.0024761011124856103, "kl": 0.2783203125, "learning_rate": 2.5831202957934393e-09, "loss": 0.000278383115073666, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6495, "train_speed(iter/s)": 0.026911 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 284.5, "completions/mean_length": 103.07291984558105, "completions/min_length": 34.75, "epoch": 9.680565897244975, "grad_norm": 1.6448995004839904, "kl": 0.2412109375, "learning_rate": 2.5591676227370572e-09, "loss": 0.03372100368142128, "memory(GiB)": 112.53, "reward": 1.8020833730697632, "reward_std": 0.06803862191736698, "rewards/CineAccuracyORM/mean": 0.802083358168602, "rewards/CineAccuracyORM/std": 0.2973194234073162, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6496, "train_speed(iter/s)": 0.02691 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 87.60416793823242, "completions/min_length": 38.75, "epoch": 9.682055100521222, "grad_norm": 0.0025438476954846227, "kl": 0.27880859375, "learning_rate": 2.535326236422053e-09, "loss": 0.00027891836361959577, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6497, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.5, "completions/mean_length": 96.30208396911621, "completions/min_length": 42.25, "epoch": 9.683544303797468, "grad_norm": 0.003212758756249045, "kl": 0.2666015625, "learning_rate": 2.5115961421823263e-09, "loss": 0.0002669362002052367, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6498, "train_speed(iter/s)": 0.026909 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.25, "completions/mean_length": 87.34375381469727, "completions/min_length": 42.75, "epoch": 9.685033507073715, "grad_norm": 0.0024082493612431854, "kl": 0.28369140625, "learning_rate": 2.4879773453266304e-09, "loss": 0.00028380120056681335, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6499, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.5, "completions/mean_length": 89.46875190734863, "completions/min_length": 38.0, "epoch": 9.686522710349962, "grad_norm": 0.002328592807818913, "kl": 0.297119140625, "learning_rate": 2.4644698511390726e-09, "loss": 0.0002967810141853988, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6500, "train_speed(iter/s)": 0.026907 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.25, "completions/mean_length": 87.68750190734863, "completions/min_length": 36.25, "epoch": 9.68801191362621, "grad_norm": 0.002574876879342371, "kl": 0.3017578125, "learning_rate": 2.4410736648785015e-09, "loss": 0.00030144653283059597, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6501, "train_speed(iter/s)": 0.026902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 189.5, "completions/mean_length": 88.38541793823242, "completions/min_length": 43.5, "epoch": 9.689501116902457, "grad_norm": 1.2042596746987604, "kl": 0.2822265625, "learning_rate": 2.4177887917792295e-09, "loss": -0.004313468001782894, "memory(GiB)": 112.53, "reward": 1.6041667461395264, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.6041666865348816, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6502, "train_speed(iter/s)": 0.026903 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.0, "completions/mean_length": 92.38541793823242, "completions/min_length": 35.75, "epoch": 9.690990320178704, "grad_norm": 0.0025747161881584948, "kl": 0.27978515625, "learning_rate": 2.3946152370505345e-09, "loss": 0.0002799070789478719, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6503, "train_speed(iter/s)": 0.026903 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 94.41666984558105, "completions/min_length": 44.25, "epoch": 9.692479523454951, "grad_norm": 0.005601788523017396, "kl": 0.2744140625, "learning_rate": 2.371553005876603e-09, "loss": 0.00027434766525402665, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6504, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 99.53125190734863, "completions/min_length": 44.25, "epoch": 9.6939687267312, "grad_norm": 0.8973187745074077, "kl": 0.27880859375, "learning_rate": 2.3486021034170854e-09, "loss": -0.014214429073035717, "memory(GiB)": 112.53, "reward": 1.8958333730697632, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8958333432674408, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6505, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.5, "completions/mean_length": 94.15625190734863, "completions/min_length": 45.0, "epoch": 9.695457930007446, "grad_norm": 0.002687366188821209, "kl": 0.28076171875, "learning_rate": 2.32576253480643e-09, "loss": 0.00028085921076126397, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6506, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 102.06250381469727, "completions/min_length": 37.25, "epoch": 9.696947133283693, "grad_norm": 0.002581508583541805, "kl": 0.2666015625, "learning_rate": 2.3030343051542168e-09, "loss": 0.0002661900653038174, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6507, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.0, "completions/mean_length": 96.94791793823242, "completions/min_length": 35.25, "epoch": 9.69843633655994, "grad_norm": 0.0021777215993873302, "kl": 0.275146484375, "learning_rate": 2.2804174195452665e-09, "loss": 0.00027490209322422743, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6508, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 89.01041793823242, "completions/min_length": 38.5, "epoch": 9.699925539836187, "grad_norm": 0.002523809261962965, "kl": 0.286376953125, "learning_rate": 2.257911883039365e-09, "loss": 0.0002865855349227786, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6509, "train_speed(iter/s)": 0.026902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 100.98958587646484, "completions/min_length": 33.75, "epoch": 9.701414743112435, "grad_norm": 0.7267507770254791, "kl": 0.3310546875, "learning_rate": 2.2355177006714298e-09, "loss": -0.0052694398909807205, "memory(GiB)": 112.53, "reward": 1.7812500298023224, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.7812500074505806, "rewards/CineAccuracyORM/std": 0.22409863770008087, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6510, "train_speed(iter/s)": 0.026902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.0, "completions/mean_length": 88.41666793823242, "completions/min_length": 35.0, "epoch": 9.702903946388682, "grad_norm": 0.002271158482735715, "kl": 0.2705078125, "learning_rate": 2.2132348774513974e-09, "loss": 0.00026994143263436854, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6511, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 322.5, "completions/mean_length": 104.17708587646484, "completions/min_length": 41.5, "epoch": 9.704393149664929, "grad_norm": 0.0028634353297328713, "kl": 0.25048828125, "learning_rate": 2.1910634183644472e-09, "loss": 0.00025009887758642435, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6512, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.25, "completions/mean_length": 99.14583778381348, "completions/min_length": 32.5, "epoch": 9.705882352941176, "grad_norm": 0.0026178289086362205, "kl": 0.257080078125, "learning_rate": 2.169003328370611e-09, "loss": 0.00025658681988716125, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6513, "train_speed(iter/s)": 0.026902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 183.75, "completions/mean_length": 91.69791984558105, "completions/min_length": 31.25, "epoch": 9.707371556217424, "grad_norm": 0.0022400579122385765, "kl": 0.258056640625, "learning_rate": 2.1470546124052746e-09, "loss": 0.0002575478283688426, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6514, "train_speed(iter/s)": 0.026903 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.5, "completions/mean_length": 94.97916984558105, "completions/min_length": 40.5, "epoch": 9.708860759493671, "grad_norm": 0.0028738145784006986, "kl": 0.271728515625, "learning_rate": 2.1252172753787323e-09, "loss": 0.0002715941518545151, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6515, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.5, "completions/mean_length": 108.73958587646484, "completions/min_length": 42.75, "epoch": 9.710349962769918, "grad_norm": 0.0024275810527099664, "kl": 0.240234375, "learning_rate": 2.1034913221762984e-09, "loss": 0.00024034440866671503, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6516, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 167.25, "completions/mean_length": 83.64583587646484, "completions/min_length": 42.25, "epoch": 9.711839166046165, "grad_norm": 0.003379367972695771, "kl": 0.28271484375, "learning_rate": 2.0818767576586403e-09, "loss": 0.0002828836441040039, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6517, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.25, "completions/mean_length": 102.11458587646484, "completions/min_length": 39.5, "epoch": 9.713328369322413, "grad_norm": 0.008653370940358219, "kl": 0.26904296875, "learning_rate": 2.0603735866612237e-09, "loss": 0.0002686922380235046, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6518, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.25, "completions/mean_length": 96.09375190734863, "completions/min_length": 42.5, "epoch": 9.71481757259866, "grad_norm": 0.002574978141245532, "kl": 0.27685546875, "learning_rate": 2.0389818139947558e-09, "loss": 0.00027661651256494224, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6519, "train_speed(iter/s)": 0.026902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 271.75, "completions/mean_length": 100.80208396911621, "completions/min_length": 40.25, "epoch": 9.716306775874907, "grad_norm": 0.705149683849258, "kl": 0.266357421875, "learning_rate": 2.0177014444449638e-09, "loss": 0.03896671161055565, "memory(GiB)": 112.53, "reward": 1.7395833730697632, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833432674408, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6520, "train_speed(iter/s)": 0.026902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 344.0, "completions/mean_length": 106.88541984558105, "completions/min_length": 40.5, "epoch": 9.717795979151154, "grad_norm": 0.002370257818043924, "kl": 0.271240234375, "learning_rate": 1.996532482772595e-09, "loss": 0.00027117040008306503, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6521, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.5, "completions/mean_length": 88.31250190734863, "completions/min_length": 41.25, "epoch": 9.719285182427402, "grad_norm": 0.0036732540261530935, "kl": 0.287109375, "learning_rate": 1.9754749337136946e-09, "loss": 0.0002869553864002228, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6522, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 90.47916984558105, "completions/min_length": 38.25, "epoch": 9.720774385703649, "grad_norm": 0.0027609822057619643, "kl": 0.26513671875, "learning_rate": 1.9545288019790494e-09, "loss": 0.0002648493682499975, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6523, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 245.75, "completions/mean_length": 93.23958396911621, "completions/min_length": 40.0, "epoch": 9.722263588979896, "grad_norm": 0.002634750428551435, "kl": 0.271484375, "learning_rate": 1.9336940922548005e-09, "loss": 0.00027113681426271796, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6524, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 93.64583778381348, "completions/min_length": 33.0, "epoch": 9.723752792256143, "grad_norm": 0.002725827689872691, "kl": 0.279052734375, "learning_rate": 1.9129708092020523e-09, "loss": 0.00027930206852033734, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6525, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 98.43750381469727, "completions/min_length": 43.0, "epoch": 9.72524199553239, "grad_norm": 0.002479952319870046, "kl": 0.255859375, "learning_rate": 1.8923589574569298e-09, "loss": 0.00025571256992407143, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6526, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 97.75000190734863, "completions/min_length": 36.75, "epoch": 9.726731198808638, "grad_norm": 0.002495174628439958, "kl": 0.265869140625, "learning_rate": 1.8718585416307443e-09, "loss": 0.00026584311854094267, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6527, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.75, "completions/mean_length": 97.35416984558105, "completions/min_length": 44.75, "epoch": 9.728220402084885, "grad_norm": 0.002586941729619801, "kl": 0.251953125, "learning_rate": 1.8514695663098267e-09, "loss": 0.00025192726752720773, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6528, "train_speed(iter/s)": 0.026902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.75, "completions/mean_length": 99.20833587646484, "completions/min_length": 41.0, "epoch": 9.729709605361132, "grad_norm": 0.0023216583312411945, "kl": 0.27783203125, "learning_rate": 1.8311920360555288e-09, "loss": 0.0002774252789095044, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6529, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.0, "completions/mean_length": 103.46875190734863, "completions/min_length": 37.5, "epoch": 9.731198808637378, "grad_norm": 2.0989313944732397, "kl": 0.257080078125, "learning_rate": 1.8110259554043327e-09, "loss": 0.007880659773945808, "memory(GiB)": 112.53, "reward": 1.6458334028720856, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.645833358168602, "rewards/CineAccuracyORM/std": 0.4870600998401642, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6530, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 91.47916793823242, "completions/min_length": 37.75, "epoch": 9.732688011913627, "grad_norm": 0.0028932876780807383, "kl": 0.27978515625, "learning_rate": 1.790971328867741e-09, "loss": 0.0002799202920868993, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6531, "train_speed(iter/s)": 0.026902 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.0, "completions/mean_length": 94.32291984558105, "completions/min_length": 48.5, "epoch": 9.734177215189874, "grad_norm": 0.8162047195085057, "kl": 0.258056640625, "learning_rate": 1.7710281609323863e-09, "loss": 0.011446270160377026, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6532, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 228.75, "completions/mean_length": 103.46875381469727, "completions/min_length": 39.75, "epoch": 9.73566641846612, "grad_norm": 0.0023224706821452588, "kl": 0.255859375, "learning_rate": 1.7511964560598669e-09, "loss": 0.00025602817186154425, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6533, "train_speed(iter/s)": 0.026903 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 90.42708778381348, "completions/min_length": 38.25, "epoch": 9.737155621742367, "grad_norm": 0.7006215755844696, "kl": 0.29248046875, "learning_rate": 1.731476218686967e-09, "loss": 0.01797098107635975, "memory(GiB)": 112.53, "reward": 1.9375, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9375, "rewards/CineAccuracyORM/std": 0.11058146506547928, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6534, "train_speed(iter/s)": 0.026903 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 307.25, "completions/mean_length": 106.09375381469727, "completions/min_length": 42.25, "epoch": 9.738644825018614, "grad_norm": 0.003959327771660205, "kl": 0.24609375, "learning_rate": 1.711867453225435e-09, "loss": 0.0002461309195496142, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6535, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.5, "completions/mean_length": 83.32291793823242, "completions/min_length": 38.5, "epoch": 9.740134028294863, "grad_norm": 0.0028584033220009973, "kl": 0.28857421875, "learning_rate": 1.6923701640621513e-09, "loss": 0.00028870918322354555, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6536, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.75, "completions/mean_length": 101.67708587646484, "completions/min_length": 47.5, "epoch": 9.74162323157111, "grad_norm": 0.8458606224640601, "kl": 0.217041015625, "learning_rate": 1.6729843555589597e-09, "loss": -0.01521118264645338, "memory(GiB)": 112.53, "reward": 1.7604166865348816, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7604166716337204, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6537, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.25, "completions/mean_length": 93.09375190734863, "completions/min_length": 37.5, "epoch": 9.743112434847356, "grad_norm": 0.0034700707786653696, "kl": 0.28173828125, "learning_rate": 1.6537100320528906e-09, "loss": 0.000281500251730904, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6538, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.0, "completions/mean_length": 104.43750190734863, "completions/min_length": 48.5, "epoch": 9.744601638123603, "grad_norm": 0.0022337609656199985, "kl": 0.2421875, "learning_rate": 1.6345471978558844e-09, "loss": 0.00024231830320786685, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6539, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.0, "completions/mean_length": 96.93750381469727, "completions/min_length": 37.75, "epoch": 9.746090841399852, "grad_norm": 0.002400385134728144, "kl": 0.266357421875, "learning_rate": 1.615495857255067e-09, "loss": 0.0002666684740688652, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6540, "train_speed(iter/s)": 0.026897 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.5, "completions/mean_length": 88.71875381469727, "completions/min_length": 37.75, "epoch": 9.747580044676099, "grad_norm": 0.002754678104951587, "kl": 0.28759765625, "learning_rate": 1.5965560145125845e-09, "loss": 0.00028748519252985716, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6541, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 192.25, "completions/mean_length": 86.27083587646484, "completions/min_length": 40.0, "epoch": 9.749069247952345, "grad_norm": 0.002498216493320616, "kl": 0.2861328125, "learning_rate": 1.5777276738656587e-09, "loss": 0.0002859078231267631, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6542, "train_speed(iter/s)": 0.026897 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.0, "completions/mean_length": 95.52083587646484, "completions/min_length": 37.75, "epoch": 9.750558451228592, "grad_norm": 0.0025101087833462113, "kl": 0.2744140625, "learning_rate": 1.5590108395265311e-09, "loss": 0.0002744211524259299, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6543, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.0, "completions/mean_length": 93.15625190734863, "completions/min_length": 41.25, "epoch": 9.75204765450484, "grad_norm": 0.0023526859919052417, "kl": 0.26953125, "learning_rate": 1.5404055156824081e-09, "loss": 0.0002694421273190528, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6544, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 184.0, "completions/mean_length": 87.12500190734863, "completions/min_length": 30.25, "epoch": 9.753536857781087, "grad_norm": 0.0023044409613714196, "kl": 0.28466796875, "learning_rate": 1.5219117064957932e-09, "loss": 0.00028430818929336965, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6545, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 233.5, "completions/mean_length": 93.22916984558105, "completions/min_length": 35.25, "epoch": 9.755026061057334, "grad_norm": 0.002265651050309885, "kl": 0.28466796875, "learning_rate": 1.503529416103988e-09, "loss": 0.0002846331335604191, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6546, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.75, "completions/mean_length": 97.42708587646484, "completions/min_length": 37.5, "epoch": 9.756515264333581, "grad_norm": 0.0023944803471311915, "kl": 0.25634765625, "learning_rate": 1.4852586486194807e-09, "loss": 0.0002561499713920057, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6547, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.5, "completions/mean_length": 89.28125381469727, "completions/min_length": 40.75, "epoch": 9.75800446760983, "grad_norm": 0.17454338740687414, "kl": 0.96484375, "learning_rate": 1.4670994081297795e-09, "loss": 0.0009641963988542557, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6548, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 222.5, "completions/mean_length": 94.86458587646484, "completions/min_length": 38.75, "epoch": 9.759493670886076, "grad_norm": 0.0024317074146769937, "kl": 0.26904296875, "learning_rate": 1.4490516986974676e-09, "loss": 0.00026912346947938204, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6549, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.0, "completions/mean_length": 99.63541793823242, "completions/min_length": 42.0, "epoch": 9.760982874162323, "grad_norm": 0.0023661813362131036, "kl": 0.2587890625, "learning_rate": 1.4311155243600936e-09, "loss": 0.00025891567929647863, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6550, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.0, "completions/mean_length": 95.38541793823242, "completions/min_length": 35.25, "epoch": 9.76247207743857, "grad_norm": 0.002516329680486478, "kl": 0.277099609375, "learning_rate": 1.4132908891303919e-09, "loss": 0.0002764478849712759, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6551, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.25, "completions/mean_length": 95.87500190734863, "completions/min_length": 37.75, "epoch": 9.763961280714817, "grad_norm": 0.0022460401863569463, "kl": 0.26611328125, "learning_rate": 1.395577796996006e-09, "loss": 0.0002662197221070528, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6552, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.75, "completions/mean_length": 82.61458587646484, "completions/min_length": 34.75, "epoch": 9.765450483991065, "grad_norm": 0.002798908212018321, "kl": 0.30517578125, "learning_rate": 1.3779762519197103e-09, "loss": 0.0003053944674320519, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6553, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 92.69791984558105, "completions/min_length": 38.5, "epoch": 9.766939687267312, "grad_norm": 0.0024978668052035107, "kl": 0.2685546875, "learning_rate": 1.3604862578392994e-09, "loss": 0.0002688877866603434, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6554, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 98.46875381469727, "completions/min_length": 43.5, "epoch": 9.768428890543559, "grad_norm": 0.0024745846905234667, "kl": 0.25927734375, "learning_rate": 1.3431078186675882e-09, "loss": 0.0002590667281765491, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6555, "train_speed(iter/s)": 0.026897 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/mean_length": 109.28125381469727, "completions/min_length": 37.0, "epoch": 9.769918093819806, "grad_norm": 0.0022191617296573067, "kl": 0.262939453125, "learning_rate": 1.3258409382924663e-09, "loss": 0.0002628232759889215, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6556, "train_speed(iter/s)": 0.026896 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 96.35416984558105, "completions/min_length": 42.5, "epoch": 9.771407297096054, "grad_norm": 0.002403366546366628, "kl": 0.26123046875, "learning_rate": 1.3086856205768438e-09, "loss": 0.00026064662961289287, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6557, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.25, "completions/mean_length": 92.90625381469727, "completions/min_length": 37.0, "epoch": 9.772896500372301, "grad_norm": 0.002386546504110066, "kl": 0.26416015625, "learning_rate": 1.291641869358706e-09, "loss": 0.0002644056803546846, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6558, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 197.0, "completions/mean_length": 92.62500381469727, "completions/min_length": 41.5, "epoch": 9.774385703648548, "grad_norm": 0.00420913307525064, "kl": 0.27587890625, "learning_rate": 1.2747096884510033e-09, "loss": 0.0002756441244855523, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6559, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 232.75, "completions/mean_length": 101.10417175292969, "completions/min_length": 34.5, "epoch": 9.775874906924795, "grad_norm": 0.0021732809462000693, "kl": 0.254150390625, "learning_rate": 1.257889081641872e-09, "loss": 0.0002546057803556323, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6560, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 96.14583587646484, "completions/min_length": 36.25, "epoch": 9.777364110201042, "grad_norm": 0.00222833267366619, "kl": 0.26904296875, "learning_rate": 1.2411800526943016e-09, "loss": 0.0002690888359211385, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6561, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 97.86458587646484, "completions/min_length": 43.0, "epoch": 9.77885331347729, "grad_norm": 0.0022997331584997138, "kl": 0.28369140625, "learning_rate": 1.2245826053465246e-09, "loss": 0.00028391293017193675, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6562, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.0, "completions/mean_length": 86.51041984558105, "completions/min_length": 39.25, "epoch": 9.780342516753537, "grad_norm": 0.002510111136856436, "kl": 0.2998046875, "learning_rate": 1.2080967433115708e-09, "loss": 0.00029935879865661263, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6563, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 263.5, "completions/mean_length": 100.38541984558105, "completions/min_length": 37.25, "epoch": 9.781831720029784, "grad_norm": 0.0022405418625281283, "kl": 0.262451171875, "learning_rate": 1.1917224702776563e-09, "loss": 0.0002624722837936133, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6564, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 262.25, "completions/mean_length": 112.09375381469727, "completions/min_length": 46.75, "epoch": 9.78332092330603, "grad_norm": 0.0022452655658459827, "kl": 0.220703125, "learning_rate": 1.1754597899081287e-09, "loss": 0.00022065915982238948, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6565, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.25, "completions/mean_length": 97.95833587646484, "completions/min_length": 43.25, "epoch": 9.784810126582279, "grad_norm": 0.002656253735828919, "kl": 0.259521484375, "learning_rate": 1.1593087058410778e-09, "loss": 0.0002595540718175471, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6566, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 312.25, "completions/mean_length": 103.21875190734863, "completions/min_length": 38.25, "epoch": 9.786299329858526, "grad_norm": 0.002230534424590627, "kl": 0.260986328125, "learning_rate": 1.1432692216899465e-09, "loss": 0.0002605150220915675, "memory(GiB)": 112.53, "reward": 1.5833334028720856, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333507180214, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6567, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 89.62500190734863, "completions/min_length": 33.75, "epoch": 9.787788533134773, "grad_norm": 0.00246310267378936, "kl": 0.27783203125, "learning_rate": 1.1273413410429754e-09, "loss": 0.000277779035968706, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6568, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.25, "completions/mean_length": 102.62500381469727, "completions/min_length": 44.75, "epoch": 9.78927773641102, "grad_norm": 1.9938373444512227, "kl": 0.24951171875, "learning_rate": 1.1115250674635368e-09, "loss": -0.0033920069690793753, "memory(GiB)": 112.53, "reward": 1.9791666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.9791666716337204, "rewards/CineAccuracyORM/std": 0.07058246433734894, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6569, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 246.5, "completions/mean_length": 99.92708587646484, "completions/min_length": 45.0, "epoch": 9.790766939687268, "grad_norm": 0.002284348833000106, "kl": 0.25732421875, "learning_rate": 1.0958204044900776e-09, "loss": 0.0002566242474131286, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6570, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 97.63541984558105, "completions/min_length": 41.25, "epoch": 9.792256142963515, "grad_norm": 0.6602134969333732, "kl": 0.31884765625, "learning_rate": 1.0802273556359542e-09, "loss": 0.01358803641051054, "memory(GiB)": 112.53, "reward": 1.8541666865348816, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8541666716337204, "rewards/CineAccuracyORM/std": 0.12590253353118896, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6571, "train_speed(iter/s)": 0.0269 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 209.25, "completions/mean_length": 90.98958587646484, "completions/min_length": 35.75, "epoch": 9.793745346239762, "grad_norm": 0.003393765545314365, "kl": 0.286865234375, "learning_rate": 1.0647459243897095e-09, "loss": 0.0002867368748411536, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6572, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/mean_length": 103.25000381469727, "completions/min_length": 43.75, "epoch": 9.795234549516008, "grad_norm": 0.002367956454125158, "kl": 0.251953125, "learning_rate": 1.0493761142147396e-09, "loss": 0.00025217418442480266, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6573, "train_speed(iter/s)": 0.026901 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.5, "completions/mean_length": 97.22916984558105, "completions/min_length": 42.0, "epoch": 9.796723752792257, "grad_norm": 0.002486190734640312, "kl": 0.27099609375, "learning_rate": 1.0341179285496271e-09, "loss": 0.00027053087251260877, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6574, "train_speed(iter/s)": 0.026899 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.25, "completions/mean_length": 99.94791984558105, "completions/min_length": 36.25, "epoch": 9.798212956068504, "grad_norm": 0.0026333207725009696, "kl": 0.2646484375, "learning_rate": 1.0189713708078084e-09, "loss": 0.0002644717169459909, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6575, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 182.0, "completions/mean_length": 88.04166984558105, "completions/min_length": 38.75, "epoch": 9.79970215934475, "grad_norm": 0.0029410796374773865, "kl": 0.271728515625, "learning_rate": 1.003936444377962e-09, "loss": 0.00027205044170841575, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6576, "train_speed(iter/s)": 0.026897 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 93.52083587646484, "completions/min_length": 38.75, "epoch": 9.801191362620997, "grad_norm": 0.002465362900976202, "kl": 0.269287109375, "learning_rate": 9.89013152623619e-10, "loss": 0.0002690118853934109, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6577, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.75, "completions/mean_length": 91.79166793823242, "completions/min_length": 30.0, "epoch": 9.802680565897244, "grad_norm": 0.0024739406171840127, "kl": 0.28369140625, "learning_rate": 9.742014988833869e-10, "loss": 0.00028361857403069735, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6578, "train_speed(iter/s)": 0.026898 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.5, "completions/mean_length": 98.37500190734863, "completions/min_length": 41.0, "epoch": 9.804169769173493, "grad_norm": 0.004675266824969545, "kl": 0.2734375, "learning_rate": 9.595014864709483e-10, "loss": 0.0002732118882704526, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6579, "train_speed(iter/s)": 0.026896 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.25, "completions/mean_length": 93.02083587646484, "completions/min_length": 41.25, "epoch": 9.80565897244974, "grad_norm": 0.002419064430615203, "kl": 0.27099609375, "learning_rate": 9.449131186749504e-10, "loss": 0.0002707410021685064, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6580, "train_speed(iter/s)": 0.026896 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 250.0, "completions/mean_length": 87.30208396911621, "completions/min_length": 39.75, "epoch": 9.807148175725986, "grad_norm": 0.0024145751276755875, "kl": 0.28271484375, "learning_rate": 9.304363987591157e-10, "loss": 0.00028265605214983225, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6581, "train_speed(iter/s)": 0.026895 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.25, "completions/mean_length": 99.65625190734863, "completions/min_length": 41.0, "epoch": 9.808637379002233, "grad_norm": 0.00233195904128171, "kl": 0.263427734375, "learning_rate": 9.160713299620759e-10, "loss": 0.00026362776407040656, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6582, "train_speed(iter/s)": 0.026895 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 179.25, "completions/mean_length": 89.35416793823242, "completions/min_length": 43.0, "epoch": 9.810126582278482, "grad_norm": 0.0024846190937004497, "kl": 0.28076171875, "learning_rate": 9.018179154975936e-10, "loss": 0.00028083051438443363, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6583, "train_speed(iter/s)": 0.026895 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 316.0, "completions/mean_length": 102.77083396911621, "completions/min_length": 37.25, "epoch": 9.811615785554729, "grad_norm": 0.002452541320258115, "kl": 0.2802734375, "learning_rate": 8.876761585545068e-10, "loss": 0.0002803094976115972, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6584, "train_speed(iter/s)": 0.026894 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.25, "completions/mean_length": 86.39583587646484, "completions/min_length": 42.5, "epoch": 9.813104988830975, "grad_norm": 0.002133673778608744, "kl": 0.287353515625, "learning_rate": 8.736460622964514e-10, "loss": 0.00028757768450304866, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6585, "train_speed(iter/s)": 0.026894 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 91.34375381469727, "completions/min_length": 40.75, "epoch": 9.814594192107222, "grad_norm": 0.002380406837458452, "kl": 0.27783203125, "learning_rate": 8.597276298623613e-10, "loss": 0.000277858431218192, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6586, "train_speed(iter/s)": 0.026894 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.0, "completions/mean_length": 90.85416984558105, "completions/min_length": 36.25, "epoch": 9.816083395383469, "grad_norm": 0.0022207076504117272, "kl": 0.2744140625, "learning_rate": 8.459208643659121e-10, "loss": 0.0002747745020315051, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6587, "train_speed(iter/s)": 0.026896 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 190.5, "completions/mean_length": 94.21875381469727, "completions/min_length": 38.0, "epoch": 9.817572598659718, "grad_norm": 0.002382447288380035, "kl": 0.27587890625, "learning_rate": 8.322257688960221e-10, "loss": 0.0002759267808869481, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6588, "train_speed(iter/s)": 0.026894 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.0, "completions/mean_length": 97.29166793823242, "completions/min_length": 40.0, "epoch": 9.819061801935964, "grad_norm": 0.002343394097526355, "kl": 0.258056640625, "learning_rate": 8.186423465164626e-10, "loss": 0.00025782070588320494, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6589, "train_speed(iter/s)": 0.026893 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 225.0, "completions/mean_length": 103.63542175292969, "completions/min_length": 44.0, "epoch": 9.820551005212211, "grad_norm": 0.002110432324630938, "kl": 0.267822265625, "learning_rate": 8.051706002661918e-10, "loss": 0.00026801732019521296, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6590, "train_speed(iter/s)": 0.026892 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.0, "completions/mean_length": 102.00000381469727, "completions/min_length": 41.0, "epoch": 9.822040208488458, "grad_norm": 0.0021387202022712533, "kl": 0.26025390625, "learning_rate": 7.918105331590763e-10, "loss": 0.0002600589068606496, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6591, "train_speed(iter/s)": 0.02689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 85.53125381469727, "completions/min_length": 35.5, "epoch": 9.823529411764707, "grad_norm": 0.002566007494044841, "kl": 0.2939453125, "learning_rate": 7.785621481838922e-10, "loss": 0.00029366841772571206, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6592, "train_speed(iter/s)": 0.026891 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 181.5, "completions/mean_length": 86.66666984558105, "completions/min_length": 33.0, "epoch": 9.825018615040953, "grad_norm": 0.0030721701753120036, "kl": 0.2744140625, "learning_rate": 7.654254483047129e-10, "loss": 0.0002744252560660243, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6593, "train_speed(iter/s)": 0.026891 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.5, "completions/mean_length": 105.95833396911621, "completions/min_length": 47.25, "epoch": 9.8265078183172, "grad_norm": 0.0024150139041461166, "kl": 0.250732421875, "learning_rate": 7.524004364603543e-10, "loss": 0.00025079515762627125, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6594, "train_speed(iter/s)": 0.02689 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 235.0, "completions/mean_length": 100.62500190734863, "completions/min_length": 42.25, "epoch": 9.827997021593447, "grad_norm": 0.0044735600343783245, "kl": 0.26220703125, "learning_rate": 7.394871155647631e-10, "loss": 0.00026229466311633587, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6595, "train_speed(iter/s)": 0.026889 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.5, "completions/mean_length": 87.93750190734863, "completions/min_length": 38.5, "epoch": 9.829486224869695, "grad_norm": 0.002470176486244503, "kl": 0.291015625, "learning_rate": 7.266854885069618e-10, "loss": 0.00029112352058291435, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6596, "train_speed(iter/s)": 0.026888 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 214.0, "completions/mean_length": 87.76041984558105, "completions/min_length": 37.5, "epoch": 9.830975428145942, "grad_norm": 0.0024079277512431964, "kl": 0.2861328125, "learning_rate": 7.139955581508261e-10, "loss": 0.00028659868985414505, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6597, "train_speed(iter/s)": 0.026887 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 93.85416793823242, "completions/min_length": 37.0, "epoch": 9.832464631422189, "grad_norm": 0.0024088865927439002, "kl": 0.2548828125, "learning_rate": 7.014173273353629e-10, "loss": 0.0002550644858274609, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6598, "train_speed(iter/s)": 0.026887 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/mean_length": 107.94791793823242, "completions/min_length": 41.75, "epoch": 9.833953834698436, "grad_norm": 0.00220401491476658, "kl": 0.245849609375, "learning_rate": 6.889507988745436e-10, "loss": 0.00024560143356211483, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6599, "train_speed(iter/s)": 0.026886 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 239.5, "completions/mean_length": 97.50000190734863, "completions/min_length": 44.25, "epoch": 9.835443037974684, "grad_norm": 1.7293514685154632, "kl": 0.26708984375, "learning_rate": 6.765959755573591e-10, "loss": -0.01242423802614212, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000074505806, "rewards/CineAccuracyORM/std": 0.190968319773674, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6600, "train_speed(iter/s)": 0.026887 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.0, "completions/mean_length": 92.80208587646484, "completions/min_length": 39.75, "epoch": 9.836932241250931, "grad_norm": 0.002544527007451528, "kl": 0.26953125, "learning_rate": 6.643528601478765e-10, "loss": 0.0002694405848160386, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6601, "train_speed(iter/s)": 0.026887 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.25, "completions/mean_length": 88.10416984558105, "completions/min_length": 39.25, "epoch": 9.838421444527178, "grad_norm": 0.0021706692712832316, "kl": 0.282958984375, "learning_rate": 6.522214553850158e-10, "loss": 0.0002830250596161932, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6602, "train_speed(iter/s)": 0.026884 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 220.5, "completions/mean_length": 96.10416984558105, "completions/min_length": 41.5, "epoch": 9.839910647803425, "grad_norm": 0.0022786151333420088, "kl": 0.260498046875, "learning_rate": 6.402017639828839e-10, "loss": 0.0002606733178254217, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6603, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 288.75, "completions/mean_length": 100.55208587646484, "completions/min_length": 40.25, "epoch": 9.841399851079672, "grad_norm": 1.132651928308819, "kl": 0.27001953125, "learning_rate": 6.282937886303852e-10, "loss": -0.04509105533361435, "memory(GiB)": 112.53, "reward": 1.5937500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.5937500074505806, "rewards/CineAccuracyORM/std": 0.3644055500626564, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6604, "train_speed(iter/s)": 0.026886 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 216.25, "completions/mean_length": 95.08333587646484, "completions/min_length": 42.25, "epoch": 9.84288905435592, "grad_norm": 0.0026343827917746495, "kl": 0.2724609375, "learning_rate": 6.16497531991722e-10, "loss": 0.0002723459620028734, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6605, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.25, "completions/mean_length": 97.67708587646484, "completions/min_length": 48.25, "epoch": 9.844378257632167, "grad_norm": 1.0563377317547156, "kl": 0.23779296875, "learning_rate": 6.048129967057835e-10, "loss": -0.014030834659934044, "memory(GiB)": 112.53, "reward": 1.5416666865348816, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.5416666716337204, "rewards/CineAccuracyORM/std": 0.24807433784008026, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6606, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.0, "completions/mean_length": 102.33333587646484, "completions/min_length": 41.25, "epoch": 9.845867460908414, "grad_norm": 0.0022217450260954403, "kl": 0.25048828125, "learning_rate": 5.932401853867008e-10, "loss": 0.00025023389025591314, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6607, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 267.0, "completions/mean_length": 101.80208587646484, "completions/min_length": 42.75, "epoch": 9.84735666418466, "grad_norm": 0.002611001611988386, "kl": 0.249755859375, "learning_rate": 5.81779100623514e-10, "loss": 0.0002497399691492319, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6608, "train_speed(iter/s)": 0.026886 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 281.5, "completions/mean_length": 102.01041793823242, "completions/min_length": 44.5, "epoch": 9.84884586746091, "grad_norm": 1.1592346077055506, "kl": 0.258056640625, "learning_rate": 5.704297449802831e-10, "loss": -0.006195888854563236, "memory(GiB)": 112.53, "reward": 1.8125000298023224, "reward_std": 0.03857583925127983, "rewards/CineAccuracyORM/mean": 0.8125000149011612, "rewards/CineAccuracyORM/std": 0.24628838896751404, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6609, "train_speed(iter/s)": 0.026886 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 231.5, "completions/mean_length": 94.60416793823242, "completions/min_length": 45.0, "epoch": 9.850335070737156, "grad_norm": 0.01284300109529155, "kl": 0.27978515625, "learning_rate": 5.591921209960326e-10, "loss": 0.00027921347646042705, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6610, "train_speed(iter/s)": 0.026887 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 258.5, "completions/mean_length": 108.44791984558105, "completions/min_length": 31.5, "epoch": 9.851824274013403, "grad_norm": 0.0027222327772269167, "kl": 0.23486328125, "learning_rate": 5.480662311848627e-10, "loss": 0.00023488968145102262, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6611, "train_speed(iter/s)": 0.026884 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.25, "completions/mean_length": 93.67708778381348, "completions/min_length": 39.25, "epoch": 9.85331347728965, "grad_norm": 0.0027332082139364454, "kl": 0.267578125, "learning_rate": 5.370520780358378e-10, "loss": 0.00026782017084769905, "memory(GiB)": 112.53, "reward": 1.5833333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.5833333432674408, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6612, "train_speed(iter/s)": 0.026884 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 226.75, "completions/mean_length": 94.75000190734863, "completions/min_length": 42.75, "epoch": 9.854802680565896, "grad_norm": 0.0026692605762398, "kl": 0.26416015625, "learning_rate": 5.261496640130425e-10, "loss": 0.00026395588065497577, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6613, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.75, "completions/mean_length": 90.11458778381348, "completions/min_length": 39.75, "epoch": 9.856291883842145, "grad_norm": 0.002500367347166145, "kl": 0.29443359375, "learning_rate": 5.153589915554701e-10, "loss": 0.0002943254658021033, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6614, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.5, "completions/mean_length": 97.11458587646484, "completions/min_length": 39.75, "epoch": 9.857781087118392, "grad_norm": 0.0022508676722314057, "kl": 0.27490234375, "learning_rate": 5.046800630773007e-10, "loss": 0.0002751850988715887, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6615, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 91.17708587646484, "completions/min_length": 34.0, "epoch": 9.859270290394639, "grad_norm": 0.005145449115922562, "kl": 0.273193359375, "learning_rate": 4.941128809675676e-10, "loss": 0.0002727755927480757, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6616, "train_speed(iter/s)": 0.026884 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.5, "completions/mean_length": 99.03125381469727, "completions/min_length": 44.75, "epoch": 9.860759493670885, "grad_norm": 0.0025022890779163097, "kl": 0.2822265625, "learning_rate": 4.836574475903243e-10, "loss": 0.00028237843071110547, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6617, "train_speed(iter/s)": 0.026884 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/mean_length": 94.11458587646484, "completions/min_length": 33.0, "epoch": 9.862248696947134, "grad_norm": 0.002382746425975266, "kl": 0.266357421875, "learning_rate": 4.733137652846441e-10, "loss": 0.00026619667187333107, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6618, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 221.75, "completions/mean_length": 93.52083587646484, "completions/min_length": 41.5, "epoch": 9.86373790022338, "grad_norm": 0.0023138169134239816, "kl": 0.2734375, "learning_rate": 4.630818363646205e-10, "loss": 0.00027391634648665786, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6619, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.75, "completions/mean_length": 95.20833587646484, "completions/min_length": 46.0, "epoch": 9.865227103499628, "grad_norm": 0.0023902592771363074, "kl": 0.27978515625, "learning_rate": 4.529616631193112e-10, "loss": 0.000279980362392962, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6620, "train_speed(iter/s)": 0.026886 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 223.0, "completions/mean_length": 100.08333587646484, "completions/min_length": 49.5, "epoch": 9.866716306775874, "grad_norm": 0.0239065130713362, "kl": 0.259033203125, "learning_rate": 4.429532478128495e-10, "loss": 0.00025895057478919625, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6621, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 177.25, "completions/mean_length": 91.67708587646484, "completions/min_length": 34.5, "epoch": 9.868205510052123, "grad_norm": 0.0021405404595055467, "kl": 0.2880859375, "learning_rate": 4.3305659268422224e-10, "loss": 0.00028829433722421527, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6622, "train_speed(iter/s)": 0.026885 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.75, "completions/mean_length": 90.23958587646484, "completions/min_length": 41.25, "epoch": 9.86969471332837, "grad_norm": 0.003988626191542289, "kl": 0.30029296875, "learning_rate": 4.2327169994749166e-10, "loss": 0.00030018898542039096, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6623, "train_speed(iter/s)": 0.026884 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.0, "completions/mean_length": 97.27083587646484, "completions/min_length": 42.5, "epoch": 9.871183916604616, "grad_norm": 0.04733506266637707, "kl": 0.31298828125, "learning_rate": 4.1359857179179557e-10, "loss": 0.0003126745577901602, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6624, "train_speed(iter/s)": 0.026882 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 243.75, "completions/mean_length": 102.09375381469727, "completions/min_length": 49.5, "epoch": 9.872673119880863, "grad_norm": 0.0021931228685747334, "kl": 0.251953125, "learning_rate": 4.0403721038112513e-10, "loss": 0.0002518832334317267, "memory(GiB)": 112.53, "reward": 1.666666716337204, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.666666679084301, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6625, "train_speed(iter/s)": 0.026882 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 296.0, "completions/mean_length": 113.29166984558105, "completions/min_length": 43.5, "epoch": 9.874162323157112, "grad_norm": 0.0022364907380821018, "kl": 0.229248046875, "learning_rate": 3.9458761785460257e-10, "loss": 0.00022898025054018945, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6626, "train_speed(iter/s)": 0.026881 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.75, "completions/mean_length": 106.15625190734863, "completions/min_length": 44.25, "epoch": 9.875651526433359, "grad_norm": 0.002261570285839167, "kl": 0.251953125, "learning_rate": 3.852497963262036e-10, "loss": 0.00025183806428685784, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6627, "train_speed(iter/s)": 0.02688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.75, "completions/mean_length": 93.05208587646484, "completions/min_length": 41.75, "epoch": 9.877140729709605, "grad_norm": 0.0026402223521122805, "kl": 0.2783203125, "learning_rate": 3.7602374788497927e-10, "loss": 0.0002788415877148509, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6628, "train_speed(iter/s)": 0.026881 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 213.5, "completions/mean_length": 96.91666793823242, "completions/min_length": 39.0, "epoch": 9.878629932985852, "grad_norm": 0.0024113743907704286, "kl": 0.2587890625, "learning_rate": 3.6690947459500076e-10, "loss": 0.0002587178023532033, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6629, "train_speed(iter/s)": 0.02688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 206.25, "completions/mean_length": 94.20833396911621, "completions/min_length": 29.75, "epoch": 9.880119136262099, "grad_norm": 2.0199986654102555, "kl": 0.28759765625, "learning_rate": 3.579069784953037e-10, "loss": 0.025959422811865807, "memory(GiB)": 112.53, "reward": 1.8854166865348816, "reward_std": 0.043129097670316696, "rewards/CineAccuracyORM/mean": 0.8854166716337204, "rewards/CineAccuracyORM/std": 0.20484384894371033, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6630, "train_speed(iter/s)": 0.02688 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 227.75, "completions/mean_length": 96.31250381469727, "completions/min_length": 47.75, "epoch": 9.881608339538348, "grad_norm": 0.0026598827347827664, "kl": 0.2861328125, "learning_rate": 3.4901626159988816e-10, "loss": 0.0002864775888156146, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6631, "train_speed(iter/s)": 0.026882 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.0, "completions/mean_length": 92.00000381469727, "completions/min_length": 40.0, "epoch": 9.883097542814594, "grad_norm": 0.002351000508222291, "kl": 0.279296875, "learning_rate": 3.402373258977742e-10, "loss": 0.0002796013723127544, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6632, "train_speed(iter/s)": 0.026881 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 255.5, "completions/mean_length": 96.14583587646484, "completions/min_length": 41.25, "epoch": 9.884586746090841, "grad_norm": 0.002388921446945256, "kl": 0.26806640625, "learning_rate": 3.315701733529463e-10, "loss": 0.00026770809199661016, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6633, "train_speed(iter/s)": 0.026881 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 196.0, "completions/mean_length": 85.28125190734863, "completions/min_length": 33.5, "epoch": 9.886075949367088, "grad_norm": 0.0025285722582878323, "kl": 0.2841796875, "learning_rate": 3.2301480590452014e-10, "loss": 0.0002847150608431548, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6634, "train_speed(iter/s)": 0.026879 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 100.87500190734863, "completions/min_length": 34.75, "epoch": 9.887565152643337, "grad_norm": 0.0024047397990067526, "kl": 0.2841796875, "learning_rate": 3.1457122546635353e-10, "loss": 0.000284093024674803, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6635, "train_speed(iter/s)": 0.026879 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 306.75, "completions/mean_length": 98.3125, "completions/min_length": 34.75, "epoch": 9.889054355919583, "grad_norm": 0.002674757710865254, "kl": 0.272705078125, "learning_rate": 3.0623943392754647e-10, "loss": 0.0002724749792832881, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6636, "train_speed(iter/s)": 0.026878 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/mean_length": 99.19791793823242, "completions/min_length": 46.0, "epoch": 9.89054355919583, "grad_norm": 0.002401526635963411, "kl": 0.270751953125, "learning_rate": 2.9801943315194144e-10, "loss": 0.00027070497162640095, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6637, "train_speed(iter/s)": 0.026879 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 247.25, "completions/mean_length": 92.81250190734863, "completions/min_length": 42.75, "epoch": 9.892032762472077, "grad_norm": 0.01129642524261608, "kl": 0.26123046875, "learning_rate": 2.8991122497862287e-10, "loss": 0.00026141625130549073, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6638, "train_speed(iter/s)": 0.026879 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 229.25, "completions/mean_length": 95.04166984558105, "completions/min_length": 42.0, "epoch": 9.893521965748324, "grad_norm": 0.002835021316260146, "kl": 0.26171875, "learning_rate": 2.819148112215286e-10, "loss": 0.00026159657863900065, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6639, "train_speed(iter/s)": 0.026879 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.0, "completions/mean_length": 97.28125381469727, "completions/min_length": 40.5, "epoch": 9.895011169024572, "grad_norm": 0.0033245355205822292, "kl": 0.28125, "learning_rate": 2.74030193669561e-10, "loss": 0.00028111384017392993, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6640, "train_speed(iter/s)": 0.026878 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 95.30208778381348, "completions/min_length": 45.5, "epoch": 9.89650037230082, "grad_norm": 0.002345815391530198, "kl": 0.27294921875, "learning_rate": 2.66257374086698e-10, "loss": 0.0002723347279243171, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6641, "train_speed(iter/s)": 0.026877 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.5, "completions/mean_length": 98.50000190734863, "completions/min_length": 46.75, "epoch": 9.897989575577066, "grad_norm": 0.002719504757756486, "kl": 0.28662109375, "learning_rate": 2.5859635421182634e-10, "loss": 0.0002870017779059708, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6642, "train_speed(iter/s)": 0.026875 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 92.06250190734863, "completions/min_length": 37.0, "epoch": 9.899478778853313, "grad_norm": 0.002507240751382154, "kl": 0.260009765625, "learning_rate": 2.5104713575885284e-10, "loss": 0.00025995043688453734, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6643, "train_speed(iter/s)": 0.026875 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.5, "completions/mean_length": 94.02083587646484, "completions/min_length": 37.5, "epoch": 9.900967982129561, "grad_norm": 0.0023155330219468925, "kl": 0.261474609375, "learning_rate": 2.436097204167042e-10, "loss": 0.00026140655972994864, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6644, "train_speed(iter/s)": 0.026876 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.75, "completions/mean_length": 85.67708587646484, "completions/min_length": 39.5, "epoch": 9.902457185405808, "grad_norm": 0.002506784583490929, "kl": 0.27197265625, "learning_rate": 2.3628410984932734e-10, "loss": 0.0002717345778364688, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6645, "train_speed(iter/s)": 0.026875 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 178.5, "completions/mean_length": 83.83333587646484, "completions/min_length": 36.25, "epoch": 9.903946388682055, "grad_norm": 0.002639761432662919, "kl": 0.287109375, "learning_rate": 2.2907030569546681e-10, "loss": 0.00028688719612546265, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6646, "train_speed(iter/s)": 0.026876 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 234.0, "completions/mean_length": 99.55208587646484, "completions/min_length": 40.75, "epoch": 9.905435591958302, "grad_norm": 0.002269520663825295, "kl": 0.2578125, "learning_rate": 2.2196830956905387e-10, "loss": 0.0002581105218268931, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6647, "train_speed(iter/s)": 0.026873 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 272.0, "completions/mean_length": 103.84375190734863, "completions/min_length": 41.0, "epoch": 9.90692479523455, "grad_norm": 0.0024746825757510802, "kl": 0.25927734375, "learning_rate": 2.149781230589842e-10, "loss": 0.00025941390777006745, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6648, "train_speed(iter/s)": 0.026869 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 195.75, "completions/mean_length": 82.72917175292969, "completions/min_length": 42.25, "epoch": 9.908413998510797, "grad_norm": 0.002600219830463031, "kl": 0.2978515625, "learning_rate": 2.0809974772900697e-10, "loss": 0.00029717921279370785, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6649, "train_speed(iter/s)": 0.026871 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.0, "completions/mean_length": 103.07291793823242, "completions/min_length": 34.25, "epoch": 9.909903201787044, "grad_norm": 0.9470509971767944, "kl": 0.264404296875, "learning_rate": 2.0133318511800224e-10, "loss": -0.012845340184867382, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6650, "train_speed(iter/s)": 0.026871 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 244.25, "completions/mean_length": 114.98958587646484, "completions/min_length": 48.0, "epoch": 9.91139240506329, "grad_norm": 0.002099307348554486, "kl": 0.238525390625, "learning_rate": 1.9467843673970362e-10, "loss": 0.0002384097024332732, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6651, "train_speed(iter/s)": 0.02687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 253.0, "completions/mean_length": 103.07291984558105, "completions/min_length": 43.75, "epoch": 9.91288160833954, "grad_norm": 0.0023952854508740317, "kl": 0.257080078125, "learning_rate": 1.8813550408297573e-10, "loss": 0.00025630503660067916, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6652, "train_speed(iter/s)": 0.026871 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.75, "completions/mean_length": 90.75000381469727, "completions/min_length": 45.5, "epoch": 9.914370811615786, "grad_norm": 0.0024745314926851746, "kl": 0.26953125, "learning_rate": 1.817043886115921e-10, "loss": 0.0002693000715225935, "memory(GiB)": 112.53, "reward": 1.6666666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666716337204, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6653, "train_speed(iter/s)": 0.026872 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 291.75, "completions/mean_length": 100.68750190734863, "completions/min_length": 39.0, "epoch": 9.915860014892033, "grad_norm": 0.0024929070338757945, "kl": 0.26025390625, "learning_rate": 1.753850917642352e-10, "loss": 0.0002600034640636295, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6654, "train_speed(iter/s)": 0.026872 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.5, "completions/mean_length": 94.39583587646484, "completions/min_length": 42.0, "epoch": 9.91734921816828, "grad_norm": 0.0025032418423957515, "kl": 0.262939453125, "learning_rate": 1.691776149547741e-10, "loss": 0.0002627918147481978, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6655, "train_speed(iter/s)": 0.026873 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 261.0, "completions/mean_length": 96.32291984558105, "completions/min_length": 37.25, "epoch": 9.918838421444526, "grad_norm": 0.0025604840028305245, "kl": 0.2978515625, "learning_rate": 1.6308195957182026e-10, "loss": 0.0002974590170197189, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6656, "train_speed(iter/s)": 0.026872 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 205.75, "completions/mean_length": 83.32291984558105, "completions/min_length": 35.25, "epoch": 9.920327624720775, "grad_norm": 0.00244939994466414, "kl": 0.298828125, "learning_rate": 1.5709812697911606e-10, "loss": 0.000299410690786317, "memory(GiB)": 112.53, "reward": 1.8333333432674408, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333358168602, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6657, "train_speed(iter/s)": 0.026873 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 193.0, "completions/mean_length": 88.32291793823242, "completions/min_length": 40.25, "epoch": 9.921816827997022, "grad_norm": 0.07982589877088128, "kl": 0.30517578125, "learning_rate": 1.512261185153685e-10, "loss": 0.00030443895957432687, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6658, "train_speed(iter/s)": 0.026874 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.0, "completions/mean_length": 84.59375190734863, "completions/min_length": 41.25, "epoch": 9.923306031273269, "grad_norm": 0.004898944148784895, "kl": 0.2978515625, "learning_rate": 1.454659354942489e-10, "loss": 0.000297385617159307, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6659, "train_speed(iter/s)": 0.026874 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.5, "completions/mean_length": 97.71875381469727, "completions/min_length": 36.5, "epoch": 9.924795234549515, "grad_norm": 0.0021024587007026454, "kl": 0.259765625, "learning_rate": 1.398175792044487e-10, "loss": 0.0002598082646727562, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6660, "train_speed(iter/s)": 0.026873 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 217.0, "completions/mean_length": 92.46875381469727, "completions/min_length": 46.5, "epoch": 9.926284437825764, "grad_norm": 0.0025881424714106244, "kl": 0.27001953125, "learning_rate": 1.3428105090956821e-10, "loss": 0.0002704602957237512, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6661, "train_speed(iter/s)": 0.026874 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 98.31250381469727, "completions/min_length": 43.5, "epoch": 9.92777364110201, "grad_norm": 0.0028053410144971837, "kl": 0.258056640625, "learning_rate": 1.2885635184828326e-10, "loss": 0.00025769832427613437, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6662, "train_speed(iter/s)": 0.026874 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.25, "completions/mean_length": 94.53125381469727, "completions/min_length": 37.25, "epoch": 9.929262844378258, "grad_norm": 0.0022118124639536954, "kl": 0.26025390625, "learning_rate": 1.235434832341231e-10, "loss": 0.0002600917359814048, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6663, "train_speed(iter/s)": 0.026875 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 237.75, "completions/mean_length": 93.42708587646484, "completions/min_length": 41.5, "epoch": 9.930752047654504, "grad_norm": 0.002298302935745162, "kl": 0.29296875, "learning_rate": 1.1834244625574807e-10, "loss": 0.00029232539236545563, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6664, "train_speed(iter/s)": 0.026875 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 287.75, "completions/mean_length": 88.81250381469727, "completions/min_length": 34.5, "epoch": 9.932241250930751, "grad_norm": 0.002800751952948814, "kl": 0.2978515625, "learning_rate": 1.1325324207667186e-10, "loss": 0.00029786810046061873, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6665, "train_speed(iter/s)": 0.026872 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 194.25, "completions/mean_length": 102.61458587646484, "completions/min_length": 39.5, "epoch": 9.933730454207, "grad_norm": 0.965118986553077, "kl": 0.256103515625, "learning_rate": 1.082758718354837e-10, "loss": -0.016541240736842155, "memory(GiB)": 112.53, "reward": 1.9270833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9270833432674408, "rewards/CineAccuracyORM/std": 0.11607640981674194, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6666, "train_speed(iter/s)": 0.026873 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 207.75, "completions/mean_length": 95.15625381469727, "completions/min_length": 39.0, "epoch": 9.935219657483247, "grad_norm": 0.07928621617867715, "kl": 0.305908203125, "learning_rate": 1.0341033664568177e-10, "loss": 0.00030583314946852624, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6667, "train_speed(iter/s)": 0.026873 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.5, "completions/mean_length": 97.18750381469727, "completions/min_length": 44.25, "epoch": 9.936708860759493, "grad_norm": 0.002236104998455933, "kl": 0.262939453125, "learning_rate": 9.865663759578424e-11, "loss": 0.0002627957728691399, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6668, "train_speed(iter/s)": 0.026872 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.5, "completions/mean_length": 94.18750381469727, "completions/min_length": 34.0, "epoch": 9.93819806403574, "grad_norm": 1.0529204304020419, "kl": 0.28271484375, "learning_rate": 9.401477574932926e-11, "loss": -0.008120370097458363, "memory(GiB)": 112.53, "reward": 1.9062500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.9062500149011612, "rewards/CineAccuracyORM/std": 0.17141688987612724, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6669, "train_speed(iter/s)": 0.026871 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 187.0, "completions/mean_length": 93.15625190734863, "completions/min_length": 41.25, "epoch": 9.939687267311989, "grad_norm": 0.0021584099213158583, "kl": 0.260498046875, "learning_rate": 8.948475214470841e-11, "loss": 0.00026085914578288794, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6670, "train_speed(iter/s)": 0.026873 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 203.75, "completions/mean_length": 89.50000381469727, "completions/min_length": 39.75, "epoch": 9.941176470588236, "grad_norm": 0.0024687887765969716, "kl": 0.28515625, "learning_rate": 8.506656779538879e-11, "loss": 0.0002856651844922453, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6671, "train_speed(iter/s)": 0.026872 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 219.25, "completions/mean_length": 99.47916793823242, "completions/min_length": 46.5, "epoch": 9.942665673864482, "grad_norm": 0.0026350944192151007, "kl": 0.268310546875, "learning_rate": 8.076022368985747e-11, "loss": 0.0002683918864931911, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6672, "train_speed(iter/s)": 0.026871 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 259.25, "completions/mean_length": 107.21875190734863, "completions/min_length": 44.25, "epoch": 9.944154877140729, "grad_norm": 0.002851252299352043, "kl": 0.2529296875, "learning_rate": 7.656572079145495e-11, "loss": 0.0002527268370613456, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6673, "train_speed(iter/s)": 0.02687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 215.25, "completions/mean_length": 94.26041793823242, "completions/min_length": 46.5, "epoch": 9.945644080416978, "grad_norm": 0.002256531888778466, "kl": 0.2548828125, "learning_rate": 7.248306003865279e-11, "loss": 0.0002552142832428217, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6674, "train_speed(iter/s)": 0.026872 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 275.75, "completions/mean_length": 100.87500381469727, "completions/min_length": 40.75, "epoch": 9.947133283693224, "grad_norm": 0.0036963512554489823, "kl": 0.2705078125, "learning_rate": 6.851224234472042e-11, "loss": 0.000270106946118176, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6675, "train_speed(iter/s)": 0.02687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.75, "completions/mean_length": 97.85416984558105, "completions/min_length": 34.0, "epoch": 9.948622486969471, "grad_norm": 0.012464046413395989, "kl": 0.29150390625, "learning_rate": 6.465326859811382e-11, "loss": 0.0002913501230068505, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6676, "train_speed(iter/s)": 0.02687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 240.25, "completions/mean_length": 93.38541984558105, "completions/min_length": 34.5, "epoch": 9.950111690245718, "grad_norm": 0.003108876106999942, "kl": 0.288818359375, "learning_rate": 6.090613966203139e-11, "loss": 0.0002889932075049728, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6677, "train_speed(iter/s)": 0.02687 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 276.5, "completions/mean_length": 104.41666984558105, "completions/min_length": 41.0, "epoch": 9.951600893521967, "grad_norm": 0.0024426414456620803, "kl": 0.260498046875, "learning_rate": 5.727085637485807e-11, "loss": 0.000259960419498384, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6678, "train_speed(iter/s)": 0.026868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.5, "completions/mean_length": 95.29166984558105, "completions/min_length": 42.0, "epoch": 9.953090096798213, "grad_norm": 0.0036656188127407634, "kl": 0.276123046875, "learning_rate": 5.37474195498322e-11, "loss": 0.00027670853887684643, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6679, "train_speed(iter/s)": 0.026867 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 266.0, "completions/mean_length": 95.25000381469727, "completions/min_length": 45.5, "epoch": 9.95457930007446, "grad_norm": 0.0024423491747288077, "kl": 0.272216796875, "learning_rate": 5.033582997526764e-11, "loss": 0.0002721587661653757, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6680, "train_speed(iter/s)": 0.026868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 212.5, "completions/mean_length": 96.97916984558105, "completions/min_length": 42.0, "epoch": 9.956068503350707, "grad_norm": 0.002316097329648835, "kl": 0.269287109375, "learning_rate": 4.703608841438722e-11, "loss": 0.00026907213032245636, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6681, "train_speed(iter/s)": 0.026868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.0, "completions/mean_length": 99.25000381469727, "completions/min_length": 46.5, "epoch": 9.957557706626954, "grad_norm": 0.0023070944769907065, "kl": 0.265625, "learning_rate": 4.3848195605322715e-11, "loss": 0.0002658866869751364, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6682, "train_speed(iter/s)": 0.026868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 268.75, "completions/mean_length": 100.38541984558105, "completions/min_length": 44.5, "epoch": 9.959046909903202, "grad_norm": 0.0023108704038977785, "kl": 0.27197265625, "learning_rate": 4.07721522613369e-11, "loss": 0.0002718576870393008, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6683, "train_speed(iter/s)": 0.026868 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 238.25, "completions/mean_length": 103.61458587646484, "completions/min_length": 48.75, "epoch": 9.96053611317945, "grad_norm": 0.002408889006569742, "kl": 0.240966796875, "learning_rate": 3.780795907060153e-11, "loss": 0.0002408589207334444, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6684, "train_speed(iter/s)": 0.026867 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 249.5, "completions/mean_length": 94.35416984558105, "completions/min_length": 34.75, "epoch": 9.962025316455696, "grad_norm": 0.002184493321517603, "kl": 0.27392578125, "learning_rate": 3.49556166962528e-11, "loss": 0.00027371704345569015, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6685, "train_speed(iter/s)": 0.026866 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 200.0, "completions/mean_length": 94.26041793823242, "completions/min_length": 42.5, "epoch": 9.963514519731943, "grad_norm": 0.00252782671718809, "kl": 0.2607421875, "learning_rate": 3.22151257763914e-11, "loss": 0.00026055212947539985, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6686, "train_speed(iter/s)": 0.026865 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 148.75, "completions/mean_length": 86.31250190734863, "completions/min_length": 45.0, "epoch": 9.965003723008191, "grad_norm": 0.00245375659642365, "kl": 0.2646484375, "learning_rate": 2.958648692413801e-11, "loss": 0.0002644083579070866, "memory(GiB)": 112.53, "reward": 2.0, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 1.0, "rewards/CineAccuracyORM/std": 0.0, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6687, "train_speed(iter/s)": 0.026865 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 251.0, "completions/mean_length": 99.42708396911621, "completions/min_length": 38.5, "epoch": 9.966492926284438, "grad_norm": 0.0020370552810428295, "kl": 0.250732421875, "learning_rate": 2.7069700727577748e-11, "loss": 0.0002509668411221355, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6688, "train_speed(iter/s)": 0.026866 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 224.5, "completions/mean_length": 95.59375190734863, "completions/min_length": 45.25, "epoch": 9.967982129560685, "grad_norm": 0.002302190851506466, "kl": 0.27197265625, "learning_rate": 2.466476774970472e-11, "loss": 0.00027224444784224033, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6689, "train_speed(iter/s)": 0.026866 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.25, "completions/mean_length": 93.54166984558105, "completions/min_length": 45.25, "epoch": 9.969471332836932, "grad_norm": 0.002382683885464439, "kl": 0.274169921875, "learning_rate": 2.2371688528644016e-11, "loss": 0.000274061196250841, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6690, "train_speed(iter/s)": 0.026865 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 298.5, "completions/mean_length": 111.29166793823242, "completions/min_length": 50.5, "epoch": 9.970960536113179, "grad_norm": 0.002255402465642501, "kl": 0.22900390625, "learning_rate": 2.0190463577263173e-11, "loss": 0.0002290682605234906, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6691, "train_speed(iter/s)": 0.026865 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 211.0, "completions/mean_length": 95.63542175292969, "completions/min_length": 39.25, "epoch": 9.972449739389427, "grad_norm": 0.0026453362488754427, "kl": 0.2802734375, "learning_rate": 1.8121093383671738e-11, "loss": 0.0002801840892061591, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6692, "train_speed(iter/s)": 0.026866 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 208.75, "completions/mean_length": 91.70833587646484, "completions/min_length": 41.5, "epoch": 9.973938942665674, "grad_norm": 0.0025247880456851744, "kl": 0.27734375, "learning_rate": 1.61635784107772e-11, "loss": 0.00027748412685468793, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6693, "train_speed(iter/s)": 0.026867 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 202.25, "completions/mean_length": 83.08333587646484, "completions/min_length": 38.0, "epoch": 9.97542814594192, "grad_norm": 0.003942437269074942, "kl": 0.33154296875, "learning_rate": 1.4317919096507036e-11, "loss": 0.0003312950429972261, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6694, "train_speed(iter/s)": 0.026867 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.75, "completions/mean_length": 92.08333587646484, "completions/min_length": 41.75, "epoch": 9.976917349218168, "grad_norm": 0.005757836511285467, "kl": 0.27783203125, "learning_rate": 1.2584115853808696e-11, "loss": 0.00027840377879329026, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6695, "train_speed(iter/s)": 0.026867 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.0, "completions/mean_length": 93.53125190734863, "completions/min_length": 29.75, "epoch": 9.978406552494416, "grad_norm": 0.0037642080403062278, "kl": 0.2705078125, "learning_rate": 1.0962169070483084e-11, "loss": 0.00027001285343430936, "memory(GiB)": 112.53, "reward": 1.8333333730697632, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.8333333432674408, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6696, "train_speed(iter/s)": 0.026867 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 242.5, "completions/mean_length": 93.91666793823242, "completions/min_length": 47.5, "epoch": 9.979895755770663, "grad_norm": 0.7441456781218311, "kl": 0.282958984375, "learning_rate": 9.452079109462108e-12, "loss": 0.01852596178650856, "memory(GiB)": 112.53, "reward": 1.7395833432674408, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.7395833358168602, "rewards/CineAccuracyORM/std": 0.24401969462633133, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6697, "train_speed(iter/s)": 0.026866 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.25, "completions/mean_length": 97.48958587646484, "completions/min_length": 39.25, "epoch": 9.98138495904691, "grad_norm": 0.9999273332773052, "kl": 0.43212890625, "learning_rate": 8.05384630853112e-12, "loss": 0.0050782994367182255, "memory(GiB)": 112.53, "reward": 1.791666716337204, "reward_std": 0.044543541967868805, "rewards/CineAccuracyORM/mean": 0.7916666865348816, "rewards/CineAccuracyORM/std": 0.33594508469104767, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6698, "train_speed(iter/s)": 0.026866 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 257.0, "completions/mean_length": 94.00000190734863, "completions/min_length": 42.5, "epoch": 9.982874162323156, "grad_norm": 0.002255810800534783, "kl": 0.27978515625, "learning_rate": 6.767470980550971e-12, "loss": 0.0002795598120428622, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6699, "train_speed(iter/s)": 0.026864 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 252.5, "completions/mean_length": 95.45833778381348, "completions/min_length": 37.5, "epoch": 9.984363365599405, "grad_norm": 0.0025316935906313422, "kl": 0.275390625, "learning_rate": 5.592953413291468e-12, "loss": 0.0002744836965575814, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6700, "train_speed(iter/s)": 0.026865 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 218.75, "completions/mean_length": 96.07291793823242, "completions/min_length": 48.0, "epoch": 9.985852568875652, "grad_norm": 0.19321240153161082, "kl": 0.325439453125, "learning_rate": 4.53029386948689e-12, "loss": 0.0003256932250224054, "memory(GiB)": 112.53, "reward": 1.6666667461395264, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.6666666865348816, "rewards/CineAccuracyORM/std": 0.4815434217453003, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6701, "train_speed(iter/s)": 0.026865 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 191.75, "completions/mean_length": 88.75000190734863, "completions/min_length": 34.75, "epoch": 9.987341772151899, "grad_norm": 1.3056896777178024, "kl": 0.26806640625, "learning_rate": 3.579492586891497e-12, "loss": 0.008011418394744396, "memory(GiB)": 112.53, "reward": 1.8437500298023224, "reward_std": 0.02946278266608715, "rewards/CineAccuracyORM/mean": 0.8437500149011612, "rewards/CineAccuracyORM/std": 0.23646226525306702, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6702, "train_speed(iter/s)": 0.026861 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 248.25, "completions/mean_length": 96.77083587646484, "completions/min_length": 37.25, "epoch": 9.988830975428145, "grad_norm": 0.0023852287881381082, "kl": 0.25634765625, "learning_rate": 2.7405497782240216e-12, "loss": 0.00025589505094103515, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6703, "train_speed(iter/s)": 0.026863 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 260.25, "completions/mean_length": 94.96875381469727, "completions/min_length": 40.0, "epoch": 9.990320178704394, "grad_norm": 0.002359928040086912, "kl": 0.28369140625, "learning_rate": 2.0134656311676656e-12, "loss": 0.0002841699169948697, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6704, "train_speed(iter/s)": 0.026862 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.75, "completions/mean_length": 102.00000381469727, "completions/min_length": 43.25, "epoch": 9.99180938198064, "grad_norm": 0.0023829343850091775, "kl": 0.2529296875, "learning_rate": 1.3982403083701022e-12, "loss": 0.0002532575745135546, "memory(GiB)": 112.53, "reward": 1.7500000596046448, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000149011612, "rewards/CineAccuracyORM/std": 0.3611575663089752, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6705, "train_speed(iter/s)": 0.02686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.5, "completions/mean_length": 96.81250190734863, "completions/min_length": 43.0, "epoch": 9.993298585256888, "grad_norm": 0.002321951036265371, "kl": 0.261474609375, "learning_rate": 8.948739474989864e-13, "loss": 0.0002609281218610704, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6706, "train_speed(iter/s)": 0.026861 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 198.75, "completions/mean_length": 96.60416984558105, "completions/min_length": 43.5, "epoch": 9.994787788533134, "grad_norm": 0.0021121591618203034, "kl": 0.251220703125, "learning_rate": 5.033666611864439e-13, "loss": 0.00025118174380622804, "memory(GiB)": 112.53, "reward": 1.7500000298023224, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.7500000074505806, "rewards/CineAccuracyORM/std": 0.24077171087265015, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6707, "train_speed(iter/s)": 0.02686 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 210.5, "completions/mean_length": 89.28125190734863, "completions/min_length": 40.5, "epoch": 9.996276991809381, "grad_norm": 0.0028483643138961993, "kl": 0.29443359375, "learning_rate": 2.2371853691804943e-13, "loss": 0.0002943617000710219, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6708, "train_speed(iter/s)": 0.026861 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 241.75, "completions/mean_length": 93.10416984558105, "completions/min_length": 40.0, "epoch": 9.99776619508563, "grad_norm": 0.0036972286321637115, "kl": 0.27685546875, "learning_rate": 5.59296373658924e-14, "loss": 0.00027627410599961877, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6709, "train_speed(iter/s)": 0.026861 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 199.25, "completions/mean_length": 96.11458396911621, "completions/min_length": 42.25, "epoch": 9.999255398361877, "grad_norm": 0.0021527105933500886, "kl": 0.25341796875, "learning_rate": 0.0, "loss": 0.0002536379615776241, "memory(GiB)": 112.53, "reward": 1.9166666865348816, "reward_std": 0.0, "rewards/CineAccuracyORM/mean": 0.9166666716337204, "rewards/CineAccuracyORM/std": 0.12038585543632507, "rewards/Format/mean": 1.0, "rewards/Format/std": 0.0, "step": 6710, "train_speed(iter/s)": 0.02686 } ], "logging_steps": 1, "max_steps": 6710, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }