| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.017014036580178648, |
| "eval_steps": 500, |
| "global_step": 40, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 313.0, |
| "completions/mean_length": 169.875, |
| "completions/mean_terminated_length": 158.8386993408203, |
| "completions/min_length": 95.0, |
| "completions/min_terminated_length": 95.0, |
| "epoch": 0.0004253509145044662, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 4.363935947418213, |
| "kl": 0.0003229373978683725, |
| "learning_rate": 0.0, |
| "loss": 0.0073, |
| "num_tokens": 81770.0, |
| "reward": 2.789294719696045, |
| "reward_std": 0.13441388309001923, |
| "rewards/format_hoi_key_reward/mean": 0.8359375, |
| "rewards/format_hoi_key_reward/std": 0.2847406268119812, |
| "rewards/format_hoi_object_label_reward/mean": 0.7421875, |
| "rewards/format_hoi_object_label_reward/std": 0.3625374734401703, |
| "rewards/format_hoi_verb_label_reward/mean": 0.5434027910232544, |
| "rewards/format_hoi_verb_label_reward/std": 0.38580045104026794, |
| "rewards/hoi_iou_reward/mean": 0.6677669286727905, |
| "rewards/hoi_iou_reward/std": 0.27127814292907715, |
| "step": 1 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 468.0, |
| "completions/max_terminated_length": 468.0, |
| "completions/mean_length": 172.28125, |
| "completions/mean_terminated_length": 172.28125, |
| "completions/min_length": 125.0, |
| "completions/min_terminated_length": 125.0, |
| "epoch": 0.0008507018290089324, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.752140760421753, |
| "kl": 0.000335300101141911, |
| "learning_rate": 7.042253521126761e-08, |
| "loss": -0.0022, |
| "num_tokens": 163613.0, |
| "reward": 2.4476234912872314, |
| "reward_std": 0.17685432732105255, |
| "rewards/format_hoi_key_reward/mean": 0.7478471994400024, |
| "rewards/format_hoi_key_reward/std": 0.3051668405532837, |
| "rewards/format_hoi_object_label_reward/mean": 0.6697916984558105, |
| "rewards/format_hoi_object_label_reward/std": 0.34505072236061096, |
| "rewards/format_hoi_verb_label_reward/mean": 0.39635416865348816, |
| "rewards/format_hoi_verb_label_reward/std": 0.2153073251247406, |
| "rewards/hoi_iou_reward/mean": 0.6336303353309631, |
| "rewards/hoi_iou_reward/std": 0.3029536306858063, |
| "step": 2 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 487.0, |
| "completions/mean_length": 186.03125, |
| "completions/mean_terminated_length": 175.51612854003906, |
| "completions/min_length": 91.0, |
| "completions/min_terminated_length": 91.0, |
| "epoch": 0.0012760527435133986, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.0201789140701294, |
| "kl": 0.0003579963668016717, |
| "learning_rate": 1.4084507042253522e-07, |
| "loss": -0.0078, |
| "num_tokens": 245910.0, |
| "reward": 2.2469096183776855, |
| "reward_std": 0.21650519967079163, |
| "rewards/format_hoi_key_reward/mean": 0.7998958826065063, |
| "rewards/format_hoi_key_reward/std": 0.2879995107650757, |
| "rewards/format_hoi_object_label_reward/mean": 0.4833333492279053, |
| "rewards/format_hoi_object_label_reward/std": 0.4311150908470154, |
| "rewards/format_hoi_verb_label_reward/mean": 0.44062501192092896, |
| "rewards/format_hoi_verb_label_reward/std": 0.39745914936065674, |
| "rewards/hoi_iou_reward/mean": 0.5230554342269897, |
| "rewards/hoi_iou_reward/std": 0.3132159411907196, |
| "step": 3 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 457.0, |
| "completions/max_terminated_length": 457.0, |
| "completions/mean_length": 186.4375, |
| "completions/mean_terminated_length": 186.4375, |
| "completions/min_length": 95.0, |
| "completions/min_terminated_length": 95.0, |
| "epoch": 0.0017014036580178648, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.2351148128509521, |
| "kl": 0.0003153797151753679, |
| "learning_rate": 2.1126760563380284e-07, |
| "loss": 0.0413, |
| "num_tokens": 328192.0, |
| "reward": 3.0791516304016113, |
| "reward_std": 0.1785963922739029, |
| "rewards/format_hoi_key_reward/mean": 0.8968750238418579, |
| "rewards/format_hoi_key_reward/std": 0.19876126945018768, |
| "rewards/format_hoi_object_label_reward/mean": 0.8411458730697632, |
| "rewards/format_hoi_object_label_reward/std": 0.30258694291114807, |
| "rewards/format_hoi_verb_label_reward/mean": 0.5755208730697632, |
| "rewards/format_hoi_verb_label_reward/std": 0.3113258183002472, |
| "rewards/hoi_iou_reward/mean": 0.7656100392341614, |
| "rewards/hoi_iou_reward/std": 0.240717351436615, |
| "step": 4 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 336.0, |
| "completions/mean_length": 191.0, |
| "completions/mean_terminated_length": 180.64515686035156, |
| "completions/min_length": 92.0, |
| "completions/min_terminated_length": 92.0, |
| "epoch": 0.002126754572522331, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.201839566230774, |
| "kl": 0.00034184849937446415, |
| "learning_rate": 2.8169014084507043e-07, |
| "loss": 0.0003, |
| "num_tokens": 410578.0, |
| "reward": 2.123943328857422, |
| "reward_std": 0.16979998350143433, |
| "rewards/format_hoi_key_reward/mean": 0.6126735806465149, |
| "rewards/format_hoi_key_reward/std": 0.3427790701389313, |
| "rewards/format_hoi_object_label_reward/mean": 0.6137152910232544, |
| "rewards/format_hoi_object_label_reward/std": 0.3750321567058563, |
| "rewards/format_hoi_verb_label_reward/mean": 0.36812394857406616, |
| "rewards/format_hoi_verb_label_reward/std": 0.27230551838874817, |
| "rewards/hoi_iou_reward/mean": 0.5294303297996521, |
| "rewards/hoi_iou_reward/std": 0.3228149712085724, |
| "step": 5 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 230.0, |
| "completions/max_terminated_length": 230.0, |
| "completions/mean_length": 157.15625, |
| "completions/mean_terminated_length": 157.15625, |
| "completions/min_length": 88.0, |
| "completions/min_terminated_length": 88.0, |
| "epoch": 0.002552105487026797, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.502875566482544, |
| "kl": 0.00034246582072228193, |
| "learning_rate": 3.521126760563381e-07, |
| "loss": -0.0042, |
| "num_tokens": 491951.0, |
| "reward": 2.6239867210388184, |
| "reward_std": 0.3057432472705841, |
| "rewards/format_hoi_key_reward/mean": 0.8656250238418579, |
| "rewards/format_hoi_key_reward/std": 0.23455658555030823, |
| "rewards/format_hoi_object_label_reward/mean": 0.6927083730697632, |
| "rewards/format_hoi_object_label_reward/std": 0.38824430108070374, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3932291567325592, |
| "rewards/format_hoi_verb_label_reward/std": 0.3944879472255707, |
| "rewards/hoi_iou_reward/mean": 0.6724243760108948, |
| "rewards/hoi_iou_reward/std": 0.2741568088531494, |
| "step": 6 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0625, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 310.0, |
| "completions/mean_length": 179.71875, |
| "completions/mean_terminated_length": 157.56668090820312, |
| "completions/min_length": 119.0, |
| "completions/min_terminated_length": 119.0, |
| "epoch": 0.0029774564015312634, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 1.2773802280426025, |
| "kl": 0.00039098386332625523, |
| "learning_rate": 4.225352112676057e-07, |
| "loss": 0.021, |
| "num_tokens": 574042.0, |
| "reward": 2.446112632751465, |
| "reward_std": 0.2966364324092865, |
| "rewards/format_hoi_key_reward/mean": 0.8067708015441895, |
| "rewards/format_hoi_key_reward/std": 0.31369173526763916, |
| "rewards/format_hoi_object_label_reward/mean": 0.6536458730697632, |
| "rewards/format_hoi_object_label_reward/std": 0.4338444769382477, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3489583432674408, |
| "rewards/format_hoi_verb_label_reward/std": 0.3668850064277649, |
| "rewards/hoi_iou_reward/mean": 0.6367375254631042, |
| "rewards/hoi_iou_reward/std": 0.2808148264884949, |
| "step": 7 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0625, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 420.0, |
| "completions/mean_length": 241.1875, |
| "completions/mean_terminated_length": 223.1333465576172, |
| "completions/min_length": 125.0, |
| "completions/min_terminated_length": 125.0, |
| "epoch": 0.0034028073160357296, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 1.0549662113189697, |
| "kl": 0.0003542311387718655, |
| "learning_rate": 4.929577464788733e-07, |
| "loss": -0.027, |
| "num_tokens": 658152.0, |
| "reward": 2.2133703231811523, |
| "reward_std": 0.4084068238735199, |
| "rewards/format_hoi_key_reward/mean": 0.6832291483879089, |
| "rewards/format_hoi_key_reward/std": 0.29706627130508423, |
| "rewards/format_hoi_object_label_reward/mean": 0.5901042222976685, |
| "rewards/format_hoi_object_label_reward/std": 0.34433093667030334, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4224702715873718, |
| "rewards/format_hoi_verb_label_reward/std": 0.32381853461265564, |
| "rewards/hoi_iou_reward/mean": 0.5175668001174927, |
| "rewards/hoi_iou_reward/std": 0.26935523748397827, |
| "step": 8 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0625, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 339.0, |
| "completions/mean_length": 196.46875, |
| "completions/mean_terminated_length": 175.433349609375, |
| "completions/min_length": 93.0, |
| "completions/min_terminated_length": 93.0, |
| "epoch": 0.0038281582305401958, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 1.110564112663269, |
| "kl": 0.0004391309848870151, |
| "learning_rate": 5.633802816901409e-07, |
| "loss": -0.0017, |
| "num_tokens": 740859.0, |
| "reward": 2.465003728866577, |
| "reward_std": 0.0937172994017601, |
| "rewards/format_hoi_key_reward/mean": 0.7511160373687744, |
| "rewards/format_hoi_key_reward/std": 0.32644686102867126, |
| "rewards/format_hoi_object_label_reward/mean": 0.6443452835083008, |
| "rewards/format_hoi_object_label_reward/std": 0.4073486030101776, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4927455186843872, |
| "rewards/format_hoi_verb_label_reward/std": 0.3828698992729187, |
| "rewards/hoi_iou_reward/mean": 0.5767968893051147, |
| "rewards/hoi_iou_reward/std": 0.30675631761550903, |
| "step": 9 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 347.0, |
| "completions/max_terminated_length": 347.0, |
| "completions/mean_length": 172.09375, |
| "completions/mean_terminated_length": 172.09375, |
| "completions/min_length": 93.0, |
| "completions/min_terminated_length": 93.0, |
| "epoch": 0.004253509145044662, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 2.442121982574463, |
| "kl": 0.00042213514825562015, |
| "learning_rate": 6.338028169014085e-07, |
| "loss": -0.0037, |
| "num_tokens": 822690.0, |
| "reward": 2.751133441925049, |
| "reward_std": 0.33584073185920715, |
| "rewards/format_hoi_key_reward/mean": 0.8541666269302368, |
| "rewards/format_hoi_key_reward/std": 0.21394065022468567, |
| "rewards/format_hoi_object_label_reward/mean": 0.7239583730697632, |
| "rewards/format_hoi_object_label_reward/std": 0.3409331738948822, |
| "rewards/format_hoi_verb_label_reward/mean": 0.5394965410232544, |
| "rewards/format_hoi_verb_label_reward/std": 0.3380621373653412, |
| "rewards/hoi_iou_reward/mean": 0.6335119009017944, |
| "rewards/hoi_iou_reward/std": 0.2383546382188797, |
| "step": 10 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 254.0, |
| "completions/max_terminated_length": 254.0, |
| "completions/mean_length": 144.0625, |
| "completions/mean_terminated_length": 144.0625, |
| "completions/min_length": 92.0, |
| "completions/min_terminated_length": 92.0, |
| "epoch": 0.004678860059549128, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.577789545059204, |
| "kl": 0.0003447915078140795, |
| "learning_rate": 7.042253521126762e-07, |
| "loss": 0.0014, |
| "num_tokens": 903614.0, |
| "reward": 2.294086456298828, |
| "reward_std": 0.05743589997291565, |
| "rewards/format_hoi_key_reward/mean": 0.7256410121917725, |
| "rewards/format_hoi_key_reward/std": 0.3094317615032196, |
| "rewards/format_hoi_object_label_reward/mean": 0.6041666865348816, |
| "rewards/format_hoi_object_label_reward/std": 0.39372313022613525, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3736979365348816, |
| "rewards/format_hoi_verb_label_reward/std": 0.3025956153869629, |
| "rewards/hoi_iou_reward/mean": 0.5905807614326477, |
| "rewards/hoi_iou_reward/std": 0.29480627179145813, |
| "step": 11 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.09375, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 441.0, |
| "completions/mean_length": 208.03125, |
| "completions/mean_terminated_length": 176.58621215820312, |
| "completions/min_length": 97.0, |
| "completions/min_terminated_length": 97.0, |
| "epoch": 0.005104210974053594, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 0.8127485513687134, |
| "kl": 0.0003554098366294056, |
| "learning_rate": 7.746478873239437e-07, |
| "loss": -0.0117, |
| "num_tokens": 986589.0, |
| "reward": 2.294464349746704, |
| "reward_std": 0.20235002040863037, |
| "rewards/format_hoi_key_reward/mean": 0.7207965850830078, |
| "rewards/format_hoi_key_reward/std": 0.35986757278442383, |
| "rewards/format_hoi_object_label_reward/mean": 0.5450674295425415, |
| "rewards/format_hoi_object_label_reward/std": 0.42823418974876404, |
| "rewards/format_hoi_verb_label_reward/mean": 0.44454658031463623, |
| "rewards/format_hoi_verb_label_reward/std": 0.4109439253807068, |
| "rewards/hoi_iou_reward/mean": 0.5840538144111633, |
| "rewards/hoi_iou_reward/std": 0.35957613587379456, |
| "step": 12 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 299.0, |
| "completions/max_terminated_length": 299.0, |
| "completions/mean_length": 172.75, |
| "completions/mean_terminated_length": 172.75, |
| "completions/min_length": 116.0, |
| "completions/min_terminated_length": 116.0, |
| "epoch": 0.00552956188855806, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.269765019416809, |
| "kl": 0.00036870952317258343, |
| "learning_rate": 8.450704225352114e-07, |
| "loss": -0.0155, |
| "num_tokens": 1068533.0, |
| "reward": 2.4297869205474854, |
| "reward_std": 0.2929306924343109, |
| "rewards/format_hoi_key_reward/mean": 0.796875, |
| "rewards/format_hoi_key_reward/std": 0.2455495446920395, |
| "rewards/format_hoi_object_label_reward/mean": 0.5703125, |
| "rewards/format_hoi_object_label_reward/std": 0.4022279679775238, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4661458432674408, |
| "rewards/format_hoi_verb_label_reward/std": 0.33458054065704346, |
| "rewards/hoi_iou_reward/mean": 0.5964536070823669, |
| "rewards/hoi_iou_reward/std": 0.2549525201320648, |
| "step": 13 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.15625, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 441.0, |
| "completions/mean_length": 233.03125, |
| "completions/mean_terminated_length": 181.37037658691406, |
| "completions/min_length": 92.0, |
| "completions/min_terminated_length": 92.0, |
| "epoch": 0.005954912803062527, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 0.9530596733093262, |
| "kl": 0.00036477607500273734, |
| "learning_rate": 9.154929577464789e-07, |
| "loss": 0.0057, |
| "num_tokens": 1152368.0, |
| "reward": 2.0490849018096924, |
| "reward_std": 0.3710783123970032, |
| "rewards/format_hoi_key_reward/mean": 0.6766666173934937, |
| "rewards/format_hoi_key_reward/std": 0.3744644820690155, |
| "rewards/format_hoi_object_label_reward/mean": 0.5354166626930237, |
| "rewards/format_hoi_object_label_reward/std": 0.4118267297744751, |
| "rewards/format_hoi_verb_label_reward/mean": 0.30980902910232544, |
| "rewards/format_hoi_verb_label_reward/std": 0.3180212080478668, |
| "rewards/hoi_iou_reward/mean": 0.5271925330162048, |
| "rewards/hoi_iou_reward/std": 0.33255496621131897, |
| "step": 14 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 332.0, |
| "completions/mean_length": 183.96875, |
| "completions/mean_terminated_length": 173.3870849609375, |
| "completions/min_length": 93.0, |
| "completions/min_terminated_length": 93.0, |
| "epoch": 0.0063802637175669925, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.3094820976257324, |
| "kl": 0.0003903634860762395, |
| "learning_rate": 9.859154929577465e-07, |
| "loss": -0.0348, |
| "num_tokens": 1234625.0, |
| "reward": 2.416910171508789, |
| "reward_std": 0.39547234773635864, |
| "rewards/format_hoi_key_reward/mean": 0.8177083730697632, |
| "rewards/format_hoi_key_reward/std": 0.25537019968032837, |
| "rewards/format_hoi_object_label_reward/mean": 0.703125, |
| "rewards/format_hoi_object_label_reward/std": 0.3103194534778595, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3838541507720947, |
| "rewards/format_hoi_verb_label_reward/std": 0.3097499907016754, |
| "rewards/hoi_iou_reward/mean": 0.5122226476669312, |
| "rewards/hoi_iou_reward/std": 0.2652962803840637, |
| "step": 15 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 510.0, |
| "completions/mean_length": 191.9375, |
| "completions/mean_terminated_length": 181.61289978027344, |
| "completions/min_length": 111.0, |
| "completions/min_terminated_length": 111.0, |
| "epoch": 0.006805614632071459, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.2858763933181763, |
| "kl": 0.00034383483580313623, |
| "learning_rate": 1.0563380281690142e-06, |
| "loss": 0.0097, |
| "num_tokens": 1317153.0, |
| "reward": 2.2493948936462402, |
| "reward_std": 0.25483453273773193, |
| "rewards/format_hoi_key_reward/mean": 0.7226041555404663, |
| "rewards/format_hoi_key_reward/std": 0.266787588596344, |
| "rewards/format_hoi_object_label_reward/mean": 0.5197916626930237, |
| "rewards/format_hoi_object_label_reward/std": 0.395753413438797, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4555555582046509, |
| "rewards/format_hoi_verb_label_reward/std": 0.34589242935180664, |
| "rewards/hoi_iou_reward/mean": 0.5514433979988098, |
| "rewards/hoi_iou_reward/std": 0.31066155433654785, |
| "step": 16 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 273.0, |
| "completions/max_terminated_length": 273.0, |
| "completions/mean_length": 143.3125, |
| "completions/mean_terminated_length": 143.3125, |
| "completions/min_length": 94.0, |
| "completions/min_terminated_length": 94.0, |
| "epoch": 0.007230965546575925, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.596779704093933, |
| "kl": 0.00033400646498193964, |
| "learning_rate": 1.1267605633802817e-06, |
| "loss": 0.0058, |
| "num_tokens": 1398085.0, |
| "reward": 2.553602695465088, |
| "reward_std": 0.11417173594236374, |
| "rewards/format_hoi_key_reward/mean": 0.8300297856330872, |
| "rewards/format_hoi_key_reward/std": 0.3036980926990509, |
| "rewards/format_hoi_object_label_reward/mean": 0.710565447807312, |
| "rewards/format_hoi_object_label_reward/std": 0.3981569707393646, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3980654776096344, |
| "rewards/format_hoi_verb_label_reward/std": 0.3377469480037689, |
| "rewards/hoi_iou_reward/mean": 0.6149418354034424, |
| "rewards/hoi_iou_reward/std": 0.28151780366897583, |
| "step": 17 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 202.0, |
| "completions/mean_length": 150.15625, |
| "completions/mean_terminated_length": 138.48387145996094, |
| "completions/min_length": 90.0, |
| "completions/min_terminated_length": 90.0, |
| "epoch": 0.0076563164610803916, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 2.1925482749938965, |
| "kl": 0.0003761410553124733, |
| "learning_rate": 1.1971830985915492e-06, |
| "loss": 0.0275, |
| "num_tokens": 1479104.0, |
| "reward": 2.4335920810699463, |
| "reward_std": 0.24886168539524078, |
| "rewards/format_hoi_key_reward/mean": 0.7729166746139526, |
| "rewards/format_hoi_key_reward/std": 0.3151136040687561, |
| "rewards/format_hoi_object_label_reward/mean": 0.6536458134651184, |
| "rewards/format_hoi_object_label_reward/std": 0.4061078131198883, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4140625, |
| "rewards/format_hoi_verb_label_reward/std": 0.34423333406448364, |
| "rewards/hoi_iou_reward/mean": 0.59296715259552, |
| "rewards/hoi_iou_reward/std": 0.3096284866333008, |
| "step": 18 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0625, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 447.0, |
| "completions/mean_length": 217.34375, |
| "completions/mean_terminated_length": 197.70001220703125, |
| "completions/min_length": 118.0, |
| "completions/min_terminated_length": 118.0, |
| "epoch": 0.008081667375584857, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 2.192422389984131, |
| "kl": 0.0004282266672817059, |
| "learning_rate": 1.267605633802817e-06, |
| "loss": 0.0079, |
| "num_tokens": 1562405.0, |
| "reward": 2.197598457336426, |
| "reward_std": 0.26950523257255554, |
| "rewards/format_hoi_key_reward/mean": 0.6578124761581421, |
| "rewards/format_hoi_key_reward/std": 0.3557007312774658, |
| "rewards/format_hoi_object_label_reward/mean": 0.59375, |
| "rewards/format_hoi_object_label_reward/std": 0.3958510160446167, |
| "rewards/format_hoi_verb_label_reward/mean": 0.41914063692092896, |
| "rewards/format_hoi_verb_label_reward/std": 0.334130197763443, |
| "rewards/hoi_iou_reward/mean": 0.52689528465271, |
| "rewards/hoi_iou_reward/std": 0.31697896122932434, |
| "step": 19 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 456.0, |
| "completions/mean_length": 167.5625, |
| "completions/mean_terminated_length": 156.4516143798828, |
| "completions/min_length": 91.0, |
| "completions/min_terminated_length": 91.0, |
| "epoch": 0.008507018290089324, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 9.545456886291504, |
| "kl": 0.0003430663564358838, |
| "learning_rate": 1.3380281690140844e-06, |
| "loss": 0.0305, |
| "num_tokens": 1644081.0, |
| "reward": 2.449803590774536, |
| "reward_std": 0.3938855230808258, |
| "rewards/format_hoi_key_reward/mean": 0.7368229031562805, |
| "rewards/format_hoi_key_reward/std": 0.29059892892837524, |
| "rewards/format_hoi_object_label_reward/mean": 0.676562488079071, |
| "rewards/format_hoi_object_label_reward/std": 0.3538031578063965, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4554687738418579, |
| "rewards/format_hoi_verb_label_reward/std": 0.31711632013320923, |
| "rewards/hoi_iou_reward/mean": 0.5809494853019714, |
| "rewards/hoi_iou_reward/std": 0.25384804606437683, |
| "step": 20 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 236.0, |
| "completions/max_terminated_length": 236.0, |
| "completions/mean_length": 153.0625, |
| "completions/mean_terminated_length": 153.0625, |
| "completions/min_length": 104.0, |
| "completions/min_terminated_length": 104.0, |
| "epoch": 0.00893236920459379, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.9242279529571533, |
| "kl": 0.00033866508601931855, |
| "learning_rate": 1.4084507042253523e-06, |
| "loss": -0.0036, |
| "num_tokens": 1725353.0, |
| "reward": 2.697577476501465, |
| "reward_std": 0.22112613916397095, |
| "rewards/format_hoi_key_reward/mean": 0.8525000214576721, |
| "rewards/format_hoi_key_reward/std": 0.22024911642074585, |
| "rewards/format_hoi_object_label_reward/mean": 0.6875, |
| "rewards/format_hoi_object_label_reward/std": 0.3544646203517914, |
| "rewards/format_hoi_verb_label_reward/mean": 0.5083333253860474, |
| "rewards/format_hoi_verb_label_reward/std": 0.3730144500732422, |
| "rewards/hoi_iou_reward/mean": 0.6492440104484558, |
| "rewards/hoi_iou_reward/std": 0.2220328450202942, |
| "step": 21 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 466.0, |
| "completions/max_terminated_length": 466.0, |
| "completions/mean_length": 199.53125, |
| "completions/mean_terminated_length": 199.53125, |
| "completions/min_length": 117.0, |
| "completions/min_terminated_length": 117.0, |
| "epoch": 0.009357720119098255, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 2.485565662384033, |
| "kl": 0.0004189620740362443, |
| "learning_rate": 1.4788732394366198e-06, |
| "loss": -0.0042, |
| "num_tokens": 1808072.0, |
| "reward": 2.559481143951416, |
| "reward_std": 0.10840541124343872, |
| "rewards/format_hoi_key_reward/mean": 0.8031938076019287, |
| "rewards/format_hoi_key_reward/std": 0.27561086416244507, |
| "rewards/format_hoi_object_label_reward/mean": 0.6789750456809998, |
| "rewards/format_hoi_object_label_reward/std": 0.39399468898773193, |
| "rewards/format_hoi_verb_label_reward/mean": 0.45625001192092896, |
| "rewards/format_hoi_verb_label_reward/std": 0.32801535725593567, |
| "rewards/hoi_iou_reward/mean": 0.6210623979568481, |
| "rewards/hoi_iou_reward/std": 0.300423264503479, |
| "step": 22 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 456.0, |
| "completions/max_terminated_length": 456.0, |
| "completions/mean_length": 190.40625, |
| "completions/mean_terminated_length": 190.40625, |
| "completions/min_length": 114.0, |
| "completions/min_terminated_length": 114.0, |
| "epoch": 0.009783071033602722, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.3170195817947388, |
| "kl": 0.000440634525148198, |
| "learning_rate": 1.5492957746478873e-06, |
| "loss": 0.0079, |
| "num_tokens": 1890443.0, |
| "reward": 2.659435510635376, |
| "reward_std": 0.15746591985225677, |
| "rewards/format_hoi_key_reward/mean": 0.8373958468437195, |
| "rewards/format_hoi_key_reward/std": 0.26145681738853455, |
| "rewards/format_hoi_object_label_reward/mean": 0.6739583611488342, |
| "rewards/format_hoi_object_label_reward/std": 0.437386691570282, |
| "rewards/format_hoi_verb_label_reward/mean": 0.504687488079071, |
| "rewards/format_hoi_verb_label_reward/std": 0.35861727595329285, |
| "rewards/hoi_iou_reward/mean": 0.6433938145637512, |
| "rewards/hoi_iou_reward/std": 0.31976941227912903, |
| "step": 23 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 280.0, |
| "completions/max_terminated_length": 280.0, |
| "completions/mean_length": 157.5, |
| "completions/mean_terminated_length": 157.5, |
| "completions/min_length": 108.0, |
| "completions/min_terminated_length": 108.0, |
| "epoch": 0.010208421948107189, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.0314154624938965, |
| "kl": 0.00044616637751460075, |
| "learning_rate": 1.6197183098591552e-06, |
| "loss": -0.0025, |
| "num_tokens": 1971841.0, |
| "reward": 2.6582393646240234, |
| "reward_std": 0.18655748665332794, |
| "rewards/format_hoi_key_reward/mean": 0.84375, |
| "rewards/format_hoi_key_reward/std": 0.25374436378479004, |
| "rewards/format_hoi_object_label_reward/mean": 0.6328125, |
| "rewards/format_hoi_object_label_reward/std": 0.4136229455471039, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4991319477558136, |
| "rewards/format_hoi_verb_label_reward/std": 0.35674116015434265, |
| "rewards/hoi_iou_reward/mean": 0.6825448274612427, |
| "rewards/hoi_iou_reward/std": 0.2595166563987732, |
| "step": 24 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0625, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 339.0, |
| "completions/mean_length": 180.28125, |
| "completions/mean_terminated_length": 158.1666717529297, |
| "completions/min_length": 116.0, |
| "completions/min_terminated_length": 116.0, |
| "epoch": 0.010633772862611655, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 11.281386375427246, |
| "kl": 0.0005614817346213385, |
| "learning_rate": 1.6901408450704227e-06, |
| "loss": 0.0657, |
| "num_tokens": 2053992.0, |
| "reward": 2.5150856971740723, |
| "reward_std": 0.24478863179683685, |
| "rewards/format_hoi_key_reward/mean": 0.809374988079071, |
| "rewards/format_hoi_key_reward/std": 0.3469045162200928, |
| "rewards/format_hoi_object_label_reward/mean": 0.70703125, |
| "rewards/format_hoi_object_label_reward/std": 0.4348819851875305, |
| "rewards/format_hoi_verb_label_reward/mean": 0.38359373807907104, |
| "rewards/format_hoi_verb_label_reward/std": 0.3953370153903961, |
| "rewards/hoi_iou_reward/mean": 0.6150857210159302, |
| "rewards/hoi_iou_reward/std": 0.3287210464477539, |
| "step": 25 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 348.0, |
| "completions/max_terminated_length": 348.0, |
| "completions/mean_length": 151.59375, |
| "completions/mean_terminated_length": 151.59375, |
| "completions/min_length": 96.0, |
| "completions/min_terminated_length": 96.0, |
| "epoch": 0.01105912377711612, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.4701417684555054, |
| "kl": 0.0005035337235312909, |
| "learning_rate": 1.7605633802816902e-06, |
| "loss": 0.0044, |
| "num_tokens": 2135251.0, |
| "reward": 2.886725902557373, |
| "reward_std": 0.2123258411884308, |
| "rewards/format_hoi_key_reward/mean": 0.8578125238418579, |
| "rewards/format_hoi_key_reward/std": 0.2075624167919159, |
| "rewards/format_hoi_object_label_reward/mean": 0.7526041865348816, |
| "rewards/format_hoi_object_label_reward/std": 0.3613770306110382, |
| "rewards/format_hoi_verb_label_reward/mean": 0.5438988208770752, |
| "rewards/format_hoi_verb_label_reward/std": 0.3702835440635681, |
| "rewards/hoi_iou_reward/mean": 0.7324104309082031, |
| "rewards/hoi_iou_reward/std": 0.2421286553144455, |
| "step": 26 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 334.0, |
| "completions/mean_length": 167.8125, |
| "completions/mean_terminated_length": 156.7096710205078, |
| "completions/min_length": 93.0, |
| "completions/min_terminated_length": 93.0, |
| "epoch": 0.011484474691620587, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.1934653520584106, |
| "kl": 0.0005954405351076275, |
| "learning_rate": 1.8309859154929579e-06, |
| "loss": -0.032, |
| "num_tokens": 2216967.0, |
| "reward": 2.394422769546509, |
| "reward_std": 0.42784780263900757, |
| "rewards/format_hoi_key_reward/mean": 0.761805534362793, |
| "rewards/format_hoi_key_reward/std": 0.296843022108078, |
| "rewards/format_hoi_object_label_reward/mean": 0.6618055701255798, |
| "rewards/format_hoi_object_label_reward/std": 0.4076228737831116, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3853406012058258, |
| "rewards/format_hoi_verb_label_reward/std": 0.3449983298778534, |
| "rewards/hoi_iou_reward/mean": 0.5854711532592773, |
| "rewards/hoi_iou_reward/std": 0.2725910246372223, |
| "step": 27 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 473.0, |
| "completions/mean_length": 178.34375, |
| "completions/mean_terminated_length": 167.5806427001953, |
| "completions/min_length": 116.0, |
| "completions/min_terminated_length": 116.0, |
| "epoch": 0.011909825606125054, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.208022952079773, |
| "kl": 0.0006310369935818017, |
| "learning_rate": 1.9014084507042254e-06, |
| "loss": -0.034, |
| "num_tokens": 2299060.0, |
| "reward": 2.214604377746582, |
| "reward_std": 0.31437236070632935, |
| "rewards/format_hoi_key_reward/mean": 0.7563762664794922, |
| "rewards/format_hoi_key_reward/std": 0.346075177192688, |
| "rewards/format_hoi_object_label_reward/mean": 0.4952651560306549, |
| "rewards/format_hoi_object_label_reward/std": 0.47521334886550903, |
| "rewards/format_hoi_verb_label_reward/mean": 0.40388256311416626, |
| "rewards/format_hoi_verb_label_reward/std": 0.3852638006210327, |
| "rewards/hoi_iou_reward/mean": 0.5590803623199463, |
| "rewards/hoi_iou_reward/std": 0.3093617558479309, |
| "step": 28 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 450.0, |
| "completions/max_terminated_length": 450.0, |
| "completions/mean_length": 174.78125, |
| "completions/mean_terminated_length": 174.78125, |
| "completions/min_length": 96.0, |
| "completions/min_terminated_length": 96.0, |
| "epoch": 0.01233517652062952, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.393183946609497, |
| "kl": 0.0007882892386987805, |
| "learning_rate": 1.971830985915493e-06, |
| "loss": -0.024, |
| "num_tokens": 2380987.0, |
| "reward": 2.2392172813415527, |
| "reward_std": 0.3153127431869507, |
| "rewards/format_hoi_key_reward/mean": 0.7479861378669739, |
| "rewards/format_hoi_key_reward/std": 0.3090425729751587, |
| "rewards/format_hoi_object_label_reward/mean": 0.5729166865348816, |
| "rewards/format_hoi_object_label_reward/std": 0.4102790057659149, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3237847089767456, |
| "rewards/format_hoi_verb_label_reward/std": 0.2613273561000824, |
| "rewards/hoi_iou_reward/mean": 0.5945297479629517, |
| "rewards/hoi_iou_reward/std": 0.3360900580883026, |
| "step": 29 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 311.0, |
| "completions/max_terminated_length": 311.0, |
| "completions/mean_length": 157.1875, |
| "completions/mean_terminated_length": 157.1875, |
| "completions/min_length": 95.0, |
| "completions/min_terminated_length": 95.0, |
| "epoch": 0.012760527435133985, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.3999539613723755, |
| "kl": 0.0008022689580684528, |
| "learning_rate": 2.0422535211267608e-06, |
| "loss": -0.0204, |
| "num_tokens": 2462487.0, |
| "reward": 2.1738319396972656, |
| "reward_std": 0.19202688336372375, |
| "rewards/format_hoi_key_reward/mean": 0.7598214149475098, |
| "rewards/format_hoi_key_reward/std": 0.29326102137565613, |
| "rewards/format_hoi_object_label_reward/mean": 0.5598958730697632, |
| "rewards/format_hoi_object_label_reward/std": 0.41382598876953125, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3598484992980957, |
| "rewards/format_hoi_verb_label_reward/std": 0.3271215260028839, |
| "rewards/hoi_iou_reward/mean": 0.49426594376564026, |
| "rewards/hoi_iou_reward/std": 0.2569417953491211, |
| "step": 30 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 492.0, |
| "completions/mean_length": 213.78125, |
| "completions/mean_terminated_length": 204.16128540039062, |
| "completions/min_length": 114.0, |
| "completions/min_terminated_length": 114.0, |
| "epoch": 0.013185878349638452, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 5.476845741271973, |
| "kl": 0.000950784218730405, |
| "learning_rate": 2.1126760563380285e-06, |
| "loss": 0.004, |
| "num_tokens": 2545612.0, |
| "reward": 2.5632033348083496, |
| "reward_std": 0.5354008078575134, |
| "rewards/format_hoi_key_reward/mean": 0.7911458015441895, |
| "rewards/format_hoi_key_reward/std": 0.23706629872322083, |
| "rewards/format_hoi_object_label_reward/mean": 0.7135416865348816, |
| "rewards/format_hoi_object_label_reward/std": 0.32581403851509094, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4348958432674408, |
| "rewards/format_hoi_verb_label_reward/std": 0.3128693997859955, |
| "rewards/hoi_iou_reward/mean": 0.6236201524734497, |
| "rewards/hoi_iou_reward/std": 0.2516784965991974, |
| "step": 31 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 274.0, |
| "completions/mean_length": 174.40625, |
| "completions/mean_terminated_length": 163.51612854003906, |
| "completions/min_length": 113.0, |
| "completions/min_terminated_length": 113.0, |
| "epoch": 0.013611229264142918, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 2.7625715732574463, |
| "kl": 0.001197469318867661, |
| "learning_rate": 2.1830985915492958e-06, |
| "loss": -0.0273, |
| "num_tokens": 2627593.0, |
| "reward": 2.5554096698760986, |
| "reward_std": 0.28477969765663147, |
| "rewards/format_hoi_key_reward/mean": 0.784375011920929, |
| "rewards/format_hoi_key_reward/std": 0.2782413065433502, |
| "rewards/format_hoi_object_label_reward/mean": 0.65625, |
| "rewards/format_hoi_object_label_reward/std": 0.38553017377853394, |
| "rewards/format_hoi_verb_label_reward/mean": 0.49926215410232544, |
| "rewards/format_hoi_verb_label_reward/std": 0.38366368412971497, |
| "rewards/hoi_iou_reward/mean": 0.6155223846435547, |
| "rewards/hoi_iou_reward/std": 0.3186318576335907, |
| "step": 32 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 333.0, |
| "completions/max_terminated_length": 333.0, |
| "completions/mean_length": 167.9375, |
| "completions/mean_terminated_length": 167.9375, |
| "completions/min_length": 109.0, |
| "completions/min_terminated_length": 109.0, |
| "epoch": 0.014036580178647383, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 2.134929656982422, |
| "kl": 0.0012024562747683376, |
| "learning_rate": 2.2535211267605635e-06, |
| "loss": -0.0295, |
| "num_tokens": 2709249.0, |
| "reward": 2.4919838905334473, |
| "reward_std": 0.2809957265853882, |
| "rewards/format_hoi_key_reward/mean": 0.8414583206176758, |
| "rewards/format_hoi_key_reward/std": 0.20066367089748383, |
| "rewards/format_hoi_object_label_reward/mean": 0.6541666984558105, |
| "rewards/format_hoi_object_label_reward/std": 0.40945371985435486, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3828125298023224, |
| "rewards/format_hoi_verb_label_reward/std": 0.28444916009902954, |
| "rewards/hoi_iou_reward/mean": 0.6135461330413818, |
| "rewards/hoi_iou_reward/std": 0.25517329573631287, |
| "step": 33 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0625, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 385.0, |
| "completions/mean_length": 204.59375, |
| "completions/mean_terminated_length": 184.10000610351562, |
| "completions/min_length": 93.0, |
| "completions/min_terminated_length": 93.0, |
| "epoch": 0.01446193109315185, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 1.4026188850402832, |
| "kl": 0.001380591158522293, |
| "learning_rate": 2.323943661971831e-06, |
| "loss": -0.0124, |
| "num_tokens": 2792274.0, |
| "reward": 2.4142298698425293, |
| "reward_std": 0.3502156734466553, |
| "rewards/format_hoi_key_reward/mean": 0.7880208492279053, |
| "rewards/format_hoi_key_reward/std": 0.28556156158447266, |
| "rewards/format_hoi_object_label_reward/mean": 0.5703125, |
| "rewards/format_hoi_object_label_reward/std": 0.41967159509658813, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4947916865348816, |
| "rewards/format_hoi_verb_label_reward/std": 0.39933720231056213, |
| "rewards/hoi_iou_reward/mean": 0.5611048936843872, |
| "rewards/hoi_iou_reward/std": 0.3223456144332886, |
| "step": 34 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 489.0, |
| "completions/mean_length": 196.3125, |
| "completions/mean_terminated_length": 186.1290283203125, |
| "completions/min_length": 125.0, |
| "completions/min_terminated_length": 125.0, |
| "epoch": 0.014887282007656316, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.316129207611084, |
| "kl": 0.0016481104830745608, |
| "learning_rate": 2.3943661971830984e-06, |
| "loss": -0.0075, |
| "num_tokens": 2874978.0, |
| "reward": 2.8513381481170654, |
| "reward_std": 0.4244565963745117, |
| "rewards/format_hoi_key_reward/mean": 0.8733173608779907, |
| "rewards/format_hoi_key_reward/std": 0.24515916407108307, |
| "rewards/format_hoi_object_label_reward/mean": 0.7844551205635071, |
| "rewards/format_hoi_object_label_reward/std": 0.3160136938095093, |
| "rewards/format_hoi_verb_label_reward/mean": 0.42851561307907104, |
| "rewards/format_hoi_verb_label_reward/std": 0.32844552397727966, |
| "rewards/hoi_iou_reward/mean": 0.7650501728057861, |
| "rewards/hoi_iou_reward/std": 0.2422105222940445, |
| "step": 35 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0625, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 458.0, |
| "completions/mean_length": 212.78125, |
| "completions/mean_terminated_length": 192.83334350585938, |
| "completions/min_length": 96.0, |
| "completions/min_terminated_length": 96.0, |
| "epoch": 0.015312632922160783, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 1.0710198879241943, |
| "kl": 0.0023336158774327487, |
| "learning_rate": 2.4647887323943666e-06, |
| "loss": 0.0636, |
| "num_tokens": 2957965.0, |
| "reward": 2.45017147064209, |
| "reward_std": 0.37150126695632935, |
| "rewards/format_hoi_key_reward/mean": 0.7733333110809326, |
| "rewards/format_hoi_key_reward/std": 0.2864827811717987, |
| "rewards/format_hoi_object_label_reward/mean": 0.6041666865348816, |
| "rewards/format_hoi_object_label_reward/std": 0.4018549919128418, |
| "rewards/format_hoi_verb_label_reward/mean": 0.4917534589767456, |
| "rewards/format_hoi_verb_label_reward/std": 0.3289114534854889, |
| "rewards/hoi_iou_reward/mean": 0.5809179544448853, |
| "rewards/hoi_iou_reward/std": 0.2540324032306671, |
| "step": 36 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 269.0, |
| "completions/mean_length": 176.375, |
| "completions/mean_terminated_length": 165.5483856201172, |
| "completions/min_length": 114.0, |
| "completions/min_terminated_length": 114.0, |
| "epoch": 0.015737983836665248, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.5639984607696533, |
| "kl": 0.002618325990624726, |
| "learning_rate": 2.535211267605634e-06, |
| "loss": 0.0076, |
| "num_tokens": 3040049.0, |
| "reward": 2.503605365753174, |
| "reward_std": 0.18485644459724426, |
| "rewards/format_hoi_key_reward/mean": 0.7159454822540283, |
| "rewards/format_hoi_key_reward/std": 0.33074691891670227, |
| "rewards/format_hoi_object_label_reward/mean": 0.6513621807098389, |
| "rewards/format_hoi_object_label_reward/std": 0.37841737270355225, |
| "rewards/format_hoi_verb_label_reward/mean": 0.5282986164093018, |
| "rewards/format_hoi_verb_label_reward/std": 0.37431567907333374, |
| "rewards/hoi_iou_reward/mean": 0.6079990863800049, |
| "rewards/hoi_iou_reward/std": 0.28626692295074463, |
| "step": 37 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 488.0, |
| "completions/max_terminated_length": 488.0, |
| "completions/mean_length": 187.28125, |
| "completions/mean_terminated_length": 187.28125, |
| "completions/min_length": 113.0, |
| "completions/min_terminated_length": 113.0, |
| "epoch": 0.016163334751169715, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 3.382786750793457, |
| "kl": 0.003079615533351898, |
| "learning_rate": 2.6056338028169015e-06, |
| "loss": -0.0705, |
| "num_tokens": 3122320.0, |
| "reward": 2.1021814346313477, |
| "reward_std": 0.3316464424133301, |
| "rewards/format_hoi_key_reward/mean": 0.6679166555404663, |
| "rewards/format_hoi_key_reward/std": 0.3353461027145386, |
| "rewards/format_hoi_object_label_reward/mean": 0.5670138597488403, |
| "rewards/format_hoi_object_label_reward/std": 0.4418155550956726, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3420138955116272, |
| "rewards/format_hoi_verb_label_reward/std": 0.37342512607574463, |
| "rewards/hoi_iou_reward/mean": 0.5252367854118347, |
| "rewards/hoi_iou_reward/std": 0.34420183300971985, |
| "step": 38 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 265.0, |
| "completions/max_terminated_length": 265.0, |
| "completions/mean_length": 155.53125, |
| "completions/mean_terminated_length": 155.53125, |
| "completions/min_length": 119.0, |
| "completions/min_terminated_length": 119.0, |
| "epoch": 0.01658868566567418, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 3.260523796081543, |
| "kl": 0.002960343728773296, |
| "learning_rate": 2.676056338028169e-06, |
| "loss": 0.0217, |
| "num_tokens": 3203647.0, |
| "reward": 2.722006320953369, |
| "reward_std": 0.10617414861917496, |
| "rewards/format_hoi_key_reward/mean": 0.8614583015441895, |
| "rewards/format_hoi_key_reward/std": 0.23783813416957855, |
| "rewards/format_hoi_object_label_reward/mean": 0.7364583611488342, |
| "rewards/format_hoi_object_label_reward/std": 0.3555357754230499, |
| "rewards/format_hoi_verb_label_reward/mean": 0.44348961114883423, |
| "rewards/format_hoi_verb_label_reward/std": 0.21557556092739105, |
| "rewards/hoi_iou_reward/mean": 0.6806000471115112, |
| "rewards/hoi_iou_reward/std": 0.24680498242378235, |
| "step": 39 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.03125, |
| "completions/max_length": 512.0, |
| "completions/max_terminated_length": 507.0, |
| "completions/mean_length": 195.375, |
| "completions/mean_terminated_length": 185.16128540039062, |
| "completions/min_length": 116.0, |
| "completions/min_terminated_length": 116.0, |
| "epoch": 0.017014036580178648, |
| "frac_reward_zero_std": 0.0, |
| "grad_norm": 1.774012565612793, |
| "kl": 0.0033842482953332365, |
| "learning_rate": 2.746478873239437e-06, |
| "loss": -0.0124, |
| "num_tokens": 3286303.0, |
| "reward": 2.2706918716430664, |
| "reward_std": 0.18238192796707153, |
| "rewards/format_hoi_key_reward/mean": 0.71370530128479, |
| "rewards/format_hoi_key_reward/std": 0.3266391158103943, |
| "rewards/format_hoi_object_label_reward/mean": 0.5867311954498291, |
| "rewards/format_hoi_object_label_reward/std": 0.3556922376155853, |
| "rewards/format_hoi_verb_label_reward/mean": 0.3763934075832367, |
| "rewards/format_hoi_verb_label_reward/std": 0.3192271590232849, |
| "rewards/hoi_iou_reward/mean": 0.5938619375228882, |
| "rewards/hoi_iou_reward/std": 0.3091049790382385, |
| "step": 40 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2351, |
| "num_input_tokens_seen": 3286303, |
| "num_train_epochs": 1, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|