{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.017014036580178648, "eval_steps": 500, "global_step": 40, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 313.0, "completions/mean_length": 169.875, "completions/mean_terminated_length": 158.8386993408203, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.0004253509145044662, "frac_reward_zero_std": 0.0, "grad_norm": 4.363935947418213, "kl": 0.0003229373978683725, "learning_rate": 0.0, "loss": 0.0073, "num_tokens": 81770.0, "reward": 2.789294719696045, "reward_std": 0.13441388309001923, "rewards/format_hoi_key_reward/mean": 0.8359375, "rewards/format_hoi_key_reward/std": 0.2847406268119812, "rewards/format_hoi_object_label_reward/mean": 0.7421875, "rewards/format_hoi_object_label_reward/std": 0.3625374734401703, "rewards/format_hoi_verb_label_reward/mean": 0.5434027910232544, "rewards/format_hoi_verb_label_reward/std": 0.38580045104026794, "rewards/hoi_iou_reward/mean": 0.6677669286727905, "rewards/hoi_iou_reward/std": 0.27127814292907715, "step": 1 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 468.0, "completions/max_terminated_length": 468.0, "completions/mean_length": 172.28125, "completions/mean_terminated_length": 172.28125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.0008507018290089324, "frac_reward_zero_std": 0.0, "grad_norm": 1.752140760421753, "kl": 0.000335300101141911, "learning_rate": 7.042253521126761e-08, "loss": -0.0022, "num_tokens": 163613.0, "reward": 2.4476234912872314, "reward_std": 0.17685432732105255, "rewards/format_hoi_key_reward/mean": 0.7478471994400024, "rewards/format_hoi_key_reward/std": 0.3051668405532837, "rewards/format_hoi_object_label_reward/mean": 0.6697916984558105, "rewards/format_hoi_object_label_reward/std": 0.34505072236061096, "rewards/format_hoi_verb_label_reward/mean": 0.39635416865348816, "rewards/format_hoi_verb_label_reward/std": 0.2153073251247406, "rewards/hoi_iou_reward/mean": 0.6336303353309631, "rewards/hoi_iou_reward/std": 0.3029536306858063, "step": 2 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 487.0, "completions/mean_length": 186.03125, "completions/mean_terminated_length": 175.51612854003906, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.0012760527435133986, "frac_reward_zero_std": 0.0, "grad_norm": 1.0201789140701294, "kl": 0.0003579963668016717, "learning_rate": 1.4084507042253522e-07, "loss": -0.0078, "num_tokens": 245910.0, "reward": 2.2469096183776855, "reward_std": 0.21650519967079163, "rewards/format_hoi_key_reward/mean": 0.7998958826065063, "rewards/format_hoi_key_reward/std": 0.2879995107650757, "rewards/format_hoi_object_label_reward/mean": 0.4833333492279053, "rewards/format_hoi_object_label_reward/std": 0.4311150908470154, "rewards/format_hoi_verb_label_reward/mean": 0.44062501192092896, "rewards/format_hoi_verb_label_reward/std": 0.39745914936065674, "rewards/hoi_iou_reward/mean": 0.5230554342269897, "rewards/hoi_iou_reward/std": 0.3132159411907196, "step": 3 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 457.0, "completions/max_terminated_length": 457.0, "completions/mean_length": 186.4375, "completions/mean_terminated_length": 186.4375, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.0017014036580178648, "frac_reward_zero_std": 0.0, "grad_norm": 1.2351148128509521, "kl": 0.0003153797151753679, "learning_rate": 2.1126760563380284e-07, "loss": 0.0413, "num_tokens": 328192.0, "reward": 3.0791516304016113, "reward_std": 0.1785963922739029, "rewards/format_hoi_key_reward/mean": 0.8968750238418579, "rewards/format_hoi_key_reward/std": 0.19876126945018768, "rewards/format_hoi_object_label_reward/mean": 0.8411458730697632, "rewards/format_hoi_object_label_reward/std": 0.30258694291114807, "rewards/format_hoi_verb_label_reward/mean": 0.5755208730697632, "rewards/format_hoi_verb_label_reward/std": 0.3113258183002472, "rewards/hoi_iou_reward/mean": 0.7656100392341614, "rewards/hoi_iou_reward/std": 0.240717351436615, "step": 4 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 336.0, "completions/mean_length": 191.0, "completions/mean_terminated_length": 180.64515686035156, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.002126754572522331, "frac_reward_zero_std": 0.0, "grad_norm": 1.201839566230774, "kl": 0.00034184849937446415, "learning_rate": 2.8169014084507043e-07, "loss": 0.0003, "num_tokens": 410578.0, "reward": 2.123943328857422, "reward_std": 0.16979998350143433, "rewards/format_hoi_key_reward/mean": 0.6126735806465149, "rewards/format_hoi_key_reward/std": 0.3427790701389313, "rewards/format_hoi_object_label_reward/mean": 0.6137152910232544, "rewards/format_hoi_object_label_reward/std": 0.3750321567058563, "rewards/format_hoi_verb_label_reward/mean": 0.36812394857406616, "rewards/format_hoi_verb_label_reward/std": 0.27230551838874817, "rewards/hoi_iou_reward/mean": 0.5294303297996521, "rewards/hoi_iou_reward/std": 0.3228149712085724, "step": 5 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 230.0, "completions/max_terminated_length": 230.0, "completions/mean_length": 157.15625, "completions/mean_terminated_length": 157.15625, "completions/min_length": 88.0, "completions/min_terminated_length": 88.0, "epoch": 0.002552105487026797, "frac_reward_zero_std": 0.0, "grad_norm": 1.502875566482544, "kl": 0.00034246582072228193, "learning_rate": 3.521126760563381e-07, "loss": -0.0042, "num_tokens": 491951.0, "reward": 2.6239867210388184, "reward_std": 0.3057432472705841, "rewards/format_hoi_key_reward/mean": 0.8656250238418579, "rewards/format_hoi_key_reward/std": 0.23455658555030823, "rewards/format_hoi_object_label_reward/mean": 0.6927083730697632, "rewards/format_hoi_object_label_reward/std": 0.38824430108070374, "rewards/format_hoi_verb_label_reward/mean": 0.3932291567325592, "rewards/format_hoi_verb_label_reward/std": 0.3944879472255707, "rewards/hoi_iou_reward/mean": 0.6724243760108948, "rewards/hoi_iou_reward/std": 0.2741568088531494, "step": 6 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 512.0, "completions/max_terminated_length": 310.0, "completions/mean_length": 179.71875, "completions/mean_terminated_length": 157.56668090820312, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.0029774564015312634, "frac_reward_zero_std": 0.0625, "grad_norm": 1.2773802280426025, "kl": 0.00039098386332625523, "learning_rate": 4.225352112676057e-07, "loss": 0.021, "num_tokens": 574042.0, "reward": 2.446112632751465, "reward_std": 0.2966364324092865, "rewards/format_hoi_key_reward/mean": 0.8067708015441895, "rewards/format_hoi_key_reward/std": 0.31369173526763916, "rewards/format_hoi_object_label_reward/mean": 0.6536458730697632, "rewards/format_hoi_object_label_reward/std": 0.4338444769382477, "rewards/format_hoi_verb_label_reward/mean": 0.3489583432674408, "rewards/format_hoi_verb_label_reward/std": 0.3668850064277649, "rewards/hoi_iou_reward/mean": 0.6367375254631042, "rewards/hoi_iou_reward/std": 0.2808148264884949, "step": 7 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 512.0, "completions/max_terminated_length": 420.0, "completions/mean_length": 241.1875, "completions/mean_terminated_length": 223.1333465576172, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.0034028073160357296, "frac_reward_zero_std": 0.0625, "grad_norm": 1.0549662113189697, "kl": 0.0003542311387718655, "learning_rate": 4.929577464788733e-07, "loss": -0.027, "num_tokens": 658152.0, "reward": 2.2133703231811523, "reward_std": 0.4084068238735199, "rewards/format_hoi_key_reward/mean": 0.6832291483879089, "rewards/format_hoi_key_reward/std": 0.29706627130508423, "rewards/format_hoi_object_label_reward/mean": 0.5901042222976685, "rewards/format_hoi_object_label_reward/std": 0.34433093667030334, "rewards/format_hoi_verb_label_reward/mean": 0.4224702715873718, "rewards/format_hoi_verb_label_reward/std": 0.32381853461265564, "rewards/hoi_iou_reward/mean": 0.5175668001174927, "rewards/hoi_iou_reward/std": 0.26935523748397827, "step": 8 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 512.0, "completions/max_terminated_length": 339.0, "completions/mean_length": 196.46875, "completions/mean_terminated_length": 175.433349609375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.0038281582305401958, "frac_reward_zero_std": 0.0625, "grad_norm": 1.110564112663269, "kl": 0.0004391309848870151, "learning_rate": 5.633802816901409e-07, "loss": -0.0017, "num_tokens": 740859.0, "reward": 2.465003728866577, "reward_std": 0.0937172994017601, "rewards/format_hoi_key_reward/mean": 0.7511160373687744, "rewards/format_hoi_key_reward/std": 0.32644686102867126, "rewards/format_hoi_object_label_reward/mean": 0.6443452835083008, "rewards/format_hoi_object_label_reward/std": 0.4073486030101776, "rewards/format_hoi_verb_label_reward/mean": 0.4927455186843872, "rewards/format_hoi_verb_label_reward/std": 0.3828698992729187, "rewards/hoi_iou_reward/mean": 0.5767968893051147, "rewards/hoi_iou_reward/std": 0.30675631761550903, "step": 9 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 347.0, "completions/max_terminated_length": 347.0, "completions/mean_length": 172.09375, "completions/mean_terminated_length": 172.09375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.004253509145044662, "frac_reward_zero_std": 0.0, "grad_norm": 2.442121982574463, "kl": 0.00042213514825562015, "learning_rate": 6.338028169014085e-07, "loss": -0.0037, "num_tokens": 822690.0, "reward": 2.751133441925049, "reward_std": 0.33584073185920715, "rewards/format_hoi_key_reward/mean": 0.8541666269302368, "rewards/format_hoi_key_reward/std": 0.21394065022468567, "rewards/format_hoi_object_label_reward/mean": 0.7239583730697632, "rewards/format_hoi_object_label_reward/std": 0.3409331738948822, "rewards/format_hoi_verb_label_reward/mean": 0.5394965410232544, "rewards/format_hoi_verb_label_reward/std": 0.3380621373653412, "rewards/hoi_iou_reward/mean": 0.6335119009017944, "rewards/hoi_iou_reward/std": 0.2383546382188797, "step": 10 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 254.0, "completions/max_terminated_length": 254.0, "completions/mean_length": 144.0625, "completions/mean_terminated_length": 144.0625, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.004678860059549128, "frac_reward_zero_std": 0.0, "grad_norm": 1.577789545059204, "kl": 0.0003447915078140795, "learning_rate": 7.042253521126762e-07, "loss": 0.0014, "num_tokens": 903614.0, "reward": 2.294086456298828, "reward_std": 0.05743589997291565, "rewards/format_hoi_key_reward/mean": 0.7256410121917725, "rewards/format_hoi_key_reward/std": 0.3094317615032196, "rewards/format_hoi_object_label_reward/mean": 0.6041666865348816, "rewards/format_hoi_object_label_reward/std": 0.39372313022613525, "rewards/format_hoi_verb_label_reward/mean": 0.3736979365348816, "rewards/format_hoi_verb_label_reward/std": 0.3025956153869629, "rewards/hoi_iou_reward/mean": 0.5905807614326477, "rewards/hoi_iou_reward/std": 0.29480627179145813, "step": 11 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.09375, "completions/max_length": 512.0, "completions/max_terminated_length": 441.0, "completions/mean_length": 208.03125, "completions/mean_terminated_length": 176.58621215820312, "completions/min_length": 97.0, "completions/min_terminated_length": 97.0, "epoch": 0.005104210974053594, "frac_reward_zero_std": 0.0625, "grad_norm": 0.8127485513687134, "kl": 0.0003554098366294056, "learning_rate": 7.746478873239437e-07, "loss": -0.0117, "num_tokens": 986589.0, "reward": 2.294464349746704, "reward_std": 0.20235002040863037, "rewards/format_hoi_key_reward/mean": 0.7207965850830078, "rewards/format_hoi_key_reward/std": 0.35986757278442383, "rewards/format_hoi_object_label_reward/mean": 0.5450674295425415, "rewards/format_hoi_object_label_reward/std": 0.42823418974876404, "rewards/format_hoi_verb_label_reward/mean": 0.44454658031463623, "rewards/format_hoi_verb_label_reward/std": 0.4109439253807068, "rewards/hoi_iou_reward/mean": 0.5840538144111633, "rewards/hoi_iou_reward/std": 0.35957613587379456, "step": 12 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 299.0, "completions/max_terminated_length": 299.0, "completions/mean_length": 172.75, "completions/mean_terminated_length": 172.75, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.00552956188855806, "frac_reward_zero_std": 0.0, "grad_norm": 1.269765019416809, "kl": 0.00036870952317258343, "learning_rate": 8.450704225352114e-07, "loss": -0.0155, "num_tokens": 1068533.0, "reward": 2.4297869205474854, "reward_std": 0.2929306924343109, "rewards/format_hoi_key_reward/mean": 0.796875, "rewards/format_hoi_key_reward/std": 0.2455495446920395, "rewards/format_hoi_object_label_reward/mean": 0.5703125, "rewards/format_hoi_object_label_reward/std": 0.4022279679775238, "rewards/format_hoi_verb_label_reward/mean": 0.4661458432674408, "rewards/format_hoi_verb_label_reward/std": 0.33458054065704346, "rewards/hoi_iou_reward/mean": 0.5964536070823669, "rewards/hoi_iou_reward/std": 0.2549525201320648, "step": 13 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.15625, "completions/max_length": 512.0, "completions/max_terminated_length": 441.0, "completions/mean_length": 233.03125, "completions/mean_terminated_length": 181.37037658691406, "completions/min_length": 92.0, "completions/min_terminated_length": 92.0, "epoch": 0.005954912803062527, "frac_reward_zero_std": 0.125, "grad_norm": 0.9530596733093262, "kl": 0.00036477607500273734, "learning_rate": 9.154929577464789e-07, "loss": 0.0057, "num_tokens": 1152368.0, "reward": 2.0490849018096924, "reward_std": 0.3710783123970032, "rewards/format_hoi_key_reward/mean": 0.6766666173934937, "rewards/format_hoi_key_reward/std": 0.3744644820690155, "rewards/format_hoi_object_label_reward/mean": 0.5354166626930237, "rewards/format_hoi_object_label_reward/std": 0.4118267297744751, "rewards/format_hoi_verb_label_reward/mean": 0.30980902910232544, "rewards/format_hoi_verb_label_reward/std": 0.3180212080478668, "rewards/hoi_iou_reward/mean": 0.5271925330162048, "rewards/hoi_iou_reward/std": 0.33255496621131897, "step": 14 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 332.0, "completions/mean_length": 183.96875, "completions/mean_terminated_length": 173.3870849609375, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.0063802637175669925, "frac_reward_zero_std": 0.0, "grad_norm": 1.3094820976257324, "kl": 0.0003903634860762395, "learning_rate": 9.859154929577465e-07, "loss": -0.0348, "num_tokens": 1234625.0, "reward": 2.416910171508789, "reward_std": 0.39547234773635864, "rewards/format_hoi_key_reward/mean": 0.8177083730697632, "rewards/format_hoi_key_reward/std": 0.25537019968032837, "rewards/format_hoi_object_label_reward/mean": 0.703125, "rewards/format_hoi_object_label_reward/std": 0.3103194534778595, "rewards/format_hoi_verb_label_reward/mean": 0.3838541507720947, "rewards/format_hoi_verb_label_reward/std": 0.3097499907016754, "rewards/hoi_iou_reward/mean": 0.5122226476669312, "rewards/hoi_iou_reward/std": 0.2652962803840637, "step": 15 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 510.0, "completions/mean_length": 191.9375, "completions/mean_terminated_length": 181.61289978027344, "completions/min_length": 111.0, "completions/min_terminated_length": 111.0, "epoch": 0.006805614632071459, "frac_reward_zero_std": 0.0, "grad_norm": 1.2858763933181763, "kl": 0.00034383483580313623, "learning_rate": 1.0563380281690142e-06, "loss": 0.0097, "num_tokens": 1317153.0, "reward": 2.2493948936462402, "reward_std": 0.25483453273773193, "rewards/format_hoi_key_reward/mean": 0.7226041555404663, "rewards/format_hoi_key_reward/std": 0.266787588596344, "rewards/format_hoi_object_label_reward/mean": 0.5197916626930237, "rewards/format_hoi_object_label_reward/std": 0.395753413438797, "rewards/format_hoi_verb_label_reward/mean": 0.4555555582046509, "rewards/format_hoi_verb_label_reward/std": 0.34589242935180664, "rewards/hoi_iou_reward/mean": 0.5514433979988098, "rewards/hoi_iou_reward/std": 0.31066155433654785, "step": 16 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 273.0, "completions/max_terminated_length": 273.0, "completions/mean_length": 143.3125, "completions/mean_terminated_length": 143.3125, "completions/min_length": 94.0, "completions/min_terminated_length": 94.0, "epoch": 0.007230965546575925, "frac_reward_zero_std": 0.0, "grad_norm": 1.596779704093933, "kl": 0.00033400646498193964, "learning_rate": 1.1267605633802817e-06, "loss": 0.0058, "num_tokens": 1398085.0, "reward": 2.553602695465088, "reward_std": 0.11417173594236374, "rewards/format_hoi_key_reward/mean": 0.8300297856330872, "rewards/format_hoi_key_reward/std": 0.3036980926990509, "rewards/format_hoi_object_label_reward/mean": 0.710565447807312, "rewards/format_hoi_object_label_reward/std": 0.3981569707393646, "rewards/format_hoi_verb_label_reward/mean": 0.3980654776096344, "rewards/format_hoi_verb_label_reward/std": 0.3377469480037689, "rewards/hoi_iou_reward/mean": 0.6149418354034424, "rewards/hoi_iou_reward/std": 0.28151780366897583, "step": 17 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 202.0, "completions/mean_length": 150.15625, "completions/mean_terminated_length": 138.48387145996094, "completions/min_length": 90.0, "completions/min_terminated_length": 90.0, "epoch": 0.0076563164610803916, "frac_reward_zero_std": 0.0, "grad_norm": 2.1925482749938965, "kl": 0.0003761410553124733, "learning_rate": 1.1971830985915492e-06, "loss": 0.0275, "num_tokens": 1479104.0, "reward": 2.4335920810699463, "reward_std": 0.24886168539524078, "rewards/format_hoi_key_reward/mean": 0.7729166746139526, "rewards/format_hoi_key_reward/std": 0.3151136040687561, "rewards/format_hoi_object_label_reward/mean": 0.6536458134651184, "rewards/format_hoi_object_label_reward/std": 0.4061078131198883, "rewards/format_hoi_verb_label_reward/mean": 0.4140625, "rewards/format_hoi_verb_label_reward/std": 0.34423333406448364, "rewards/hoi_iou_reward/mean": 0.59296715259552, "rewards/hoi_iou_reward/std": 0.3096284866333008, "step": 18 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 512.0, "completions/max_terminated_length": 447.0, "completions/mean_length": 217.34375, "completions/mean_terminated_length": 197.70001220703125, "completions/min_length": 118.0, "completions/min_terminated_length": 118.0, "epoch": 0.008081667375584857, "frac_reward_zero_std": 0.0, "grad_norm": 2.192422389984131, "kl": 0.0004282266672817059, "learning_rate": 1.267605633802817e-06, "loss": 0.0079, "num_tokens": 1562405.0, "reward": 2.197598457336426, "reward_std": 0.26950523257255554, "rewards/format_hoi_key_reward/mean": 0.6578124761581421, "rewards/format_hoi_key_reward/std": 0.3557007312774658, "rewards/format_hoi_object_label_reward/mean": 0.59375, "rewards/format_hoi_object_label_reward/std": 0.3958510160446167, "rewards/format_hoi_verb_label_reward/mean": 0.41914063692092896, "rewards/format_hoi_verb_label_reward/std": 0.334130197763443, "rewards/hoi_iou_reward/mean": 0.52689528465271, "rewards/hoi_iou_reward/std": 0.31697896122932434, "step": 19 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 456.0, "completions/mean_length": 167.5625, "completions/mean_terminated_length": 156.4516143798828, "completions/min_length": 91.0, "completions/min_terminated_length": 91.0, "epoch": 0.008507018290089324, "frac_reward_zero_std": 0.0, "grad_norm": 9.545456886291504, "kl": 0.0003430663564358838, "learning_rate": 1.3380281690140844e-06, "loss": 0.0305, "num_tokens": 1644081.0, "reward": 2.449803590774536, "reward_std": 0.3938855230808258, "rewards/format_hoi_key_reward/mean": 0.7368229031562805, "rewards/format_hoi_key_reward/std": 0.29059892892837524, "rewards/format_hoi_object_label_reward/mean": 0.676562488079071, "rewards/format_hoi_object_label_reward/std": 0.3538031578063965, "rewards/format_hoi_verb_label_reward/mean": 0.4554687738418579, "rewards/format_hoi_verb_label_reward/std": 0.31711632013320923, "rewards/hoi_iou_reward/mean": 0.5809494853019714, "rewards/hoi_iou_reward/std": 0.25384804606437683, "step": 20 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 236.0, "completions/max_terminated_length": 236.0, "completions/mean_length": 153.0625, "completions/mean_terminated_length": 153.0625, "completions/min_length": 104.0, "completions/min_terminated_length": 104.0, "epoch": 0.00893236920459379, "frac_reward_zero_std": 0.0, "grad_norm": 1.9242279529571533, "kl": 0.00033866508601931855, "learning_rate": 1.4084507042253523e-06, "loss": -0.0036, "num_tokens": 1725353.0, "reward": 2.697577476501465, "reward_std": 0.22112613916397095, "rewards/format_hoi_key_reward/mean": 0.8525000214576721, "rewards/format_hoi_key_reward/std": 0.22024911642074585, "rewards/format_hoi_object_label_reward/mean": 0.6875, "rewards/format_hoi_object_label_reward/std": 0.3544646203517914, "rewards/format_hoi_verb_label_reward/mean": 0.5083333253860474, "rewards/format_hoi_verb_label_reward/std": 0.3730144500732422, "rewards/hoi_iou_reward/mean": 0.6492440104484558, "rewards/hoi_iou_reward/std": 0.2220328450202942, "step": 21 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 466.0, "completions/max_terminated_length": 466.0, "completions/mean_length": 199.53125, "completions/mean_terminated_length": 199.53125, "completions/min_length": 117.0, "completions/min_terminated_length": 117.0, "epoch": 0.009357720119098255, "frac_reward_zero_std": 0.0, "grad_norm": 2.485565662384033, "kl": 0.0004189620740362443, "learning_rate": 1.4788732394366198e-06, "loss": -0.0042, "num_tokens": 1808072.0, "reward": 2.559481143951416, "reward_std": 0.10840541124343872, "rewards/format_hoi_key_reward/mean": 0.8031938076019287, "rewards/format_hoi_key_reward/std": 0.27561086416244507, "rewards/format_hoi_object_label_reward/mean": 0.6789750456809998, "rewards/format_hoi_object_label_reward/std": 0.39399468898773193, "rewards/format_hoi_verb_label_reward/mean": 0.45625001192092896, "rewards/format_hoi_verb_label_reward/std": 0.32801535725593567, "rewards/hoi_iou_reward/mean": 0.6210623979568481, "rewards/hoi_iou_reward/std": 0.300423264503479, "step": 22 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 456.0, "completions/max_terminated_length": 456.0, "completions/mean_length": 190.40625, "completions/mean_terminated_length": 190.40625, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.009783071033602722, "frac_reward_zero_std": 0.0, "grad_norm": 1.3170195817947388, "kl": 0.000440634525148198, "learning_rate": 1.5492957746478873e-06, "loss": 0.0079, "num_tokens": 1890443.0, "reward": 2.659435510635376, "reward_std": 0.15746591985225677, "rewards/format_hoi_key_reward/mean": 0.8373958468437195, "rewards/format_hoi_key_reward/std": 0.26145681738853455, "rewards/format_hoi_object_label_reward/mean": 0.6739583611488342, "rewards/format_hoi_object_label_reward/std": 0.437386691570282, "rewards/format_hoi_verb_label_reward/mean": 0.504687488079071, "rewards/format_hoi_verb_label_reward/std": 0.35861727595329285, "rewards/hoi_iou_reward/mean": 0.6433938145637512, "rewards/hoi_iou_reward/std": 0.31976941227912903, "step": 23 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 280.0, "completions/max_terminated_length": 280.0, "completions/mean_length": 157.5, "completions/mean_terminated_length": 157.5, "completions/min_length": 108.0, "completions/min_terminated_length": 108.0, "epoch": 0.010208421948107189, "frac_reward_zero_std": 0.0, "grad_norm": 1.0314154624938965, "kl": 0.00044616637751460075, "learning_rate": 1.6197183098591552e-06, "loss": -0.0025, "num_tokens": 1971841.0, "reward": 2.6582393646240234, "reward_std": 0.18655748665332794, "rewards/format_hoi_key_reward/mean": 0.84375, "rewards/format_hoi_key_reward/std": 0.25374436378479004, "rewards/format_hoi_object_label_reward/mean": 0.6328125, "rewards/format_hoi_object_label_reward/std": 0.4136229455471039, "rewards/format_hoi_verb_label_reward/mean": 0.4991319477558136, "rewards/format_hoi_verb_label_reward/std": 0.35674116015434265, "rewards/hoi_iou_reward/mean": 0.6825448274612427, "rewards/hoi_iou_reward/std": 0.2595166563987732, "step": 24 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 512.0, "completions/max_terminated_length": 339.0, "completions/mean_length": 180.28125, "completions/mean_terminated_length": 158.1666717529297, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.010633772862611655, "frac_reward_zero_std": 0.0, "grad_norm": 11.281386375427246, "kl": 0.0005614817346213385, "learning_rate": 1.6901408450704227e-06, "loss": 0.0657, "num_tokens": 2053992.0, "reward": 2.5150856971740723, "reward_std": 0.24478863179683685, "rewards/format_hoi_key_reward/mean": 0.809374988079071, "rewards/format_hoi_key_reward/std": 0.3469045162200928, "rewards/format_hoi_object_label_reward/mean": 0.70703125, "rewards/format_hoi_object_label_reward/std": 0.4348819851875305, "rewards/format_hoi_verb_label_reward/mean": 0.38359373807907104, "rewards/format_hoi_verb_label_reward/std": 0.3953370153903961, "rewards/hoi_iou_reward/mean": 0.6150857210159302, "rewards/hoi_iou_reward/std": 0.3287210464477539, "step": 25 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 348.0, "completions/max_terminated_length": 348.0, "completions/mean_length": 151.59375, "completions/mean_terminated_length": 151.59375, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.01105912377711612, "frac_reward_zero_std": 0.0, "grad_norm": 1.4701417684555054, "kl": 0.0005035337235312909, "learning_rate": 1.7605633802816902e-06, "loss": 0.0044, "num_tokens": 2135251.0, "reward": 2.886725902557373, "reward_std": 0.2123258411884308, "rewards/format_hoi_key_reward/mean": 0.8578125238418579, "rewards/format_hoi_key_reward/std": 0.2075624167919159, "rewards/format_hoi_object_label_reward/mean": 0.7526041865348816, "rewards/format_hoi_object_label_reward/std": 0.3613770306110382, "rewards/format_hoi_verb_label_reward/mean": 0.5438988208770752, "rewards/format_hoi_verb_label_reward/std": 0.3702835440635681, "rewards/hoi_iou_reward/mean": 0.7324104309082031, "rewards/hoi_iou_reward/std": 0.2421286553144455, "step": 26 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 334.0, "completions/mean_length": 167.8125, "completions/mean_terminated_length": 156.7096710205078, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.011484474691620587, "frac_reward_zero_std": 0.0, "grad_norm": 1.1934653520584106, "kl": 0.0005954405351076275, "learning_rate": 1.8309859154929579e-06, "loss": -0.032, "num_tokens": 2216967.0, "reward": 2.394422769546509, "reward_std": 0.42784780263900757, "rewards/format_hoi_key_reward/mean": 0.761805534362793, "rewards/format_hoi_key_reward/std": 0.296843022108078, "rewards/format_hoi_object_label_reward/mean": 0.6618055701255798, "rewards/format_hoi_object_label_reward/std": 0.4076228737831116, "rewards/format_hoi_verb_label_reward/mean": 0.3853406012058258, "rewards/format_hoi_verb_label_reward/std": 0.3449983298778534, "rewards/hoi_iou_reward/mean": 0.5854711532592773, "rewards/hoi_iou_reward/std": 0.2725910246372223, "step": 27 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 473.0, "completions/mean_length": 178.34375, "completions/mean_terminated_length": 167.5806427001953, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.011909825606125054, "frac_reward_zero_std": 0.0, "grad_norm": 1.208022952079773, "kl": 0.0006310369935818017, "learning_rate": 1.9014084507042254e-06, "loss": -0.034, "num_tokens": 2299060.0, "reward": 2.214604377746582, "reward_std": 0.31437236070632935, "rewards/format_hoi_key_reward/mean": 0.7563762664794922, "rewards/format_hoi_key_reward/std": 0.346075177192688, "rewards/format_hoi_object_label_reward/mean": 0.4952651560306549, "rewards/format_hoi_object_label_reward/std": 0.47521334886550903, "rewards/format_hoi_verb_label_reward/mean": 0.40388256311416626, "rewards/format_hoi_verb_label_reward/std": 0.3852638006210327, "rewards/hoi_iou_reward/mean": 0.5590803623199463, "rewards/hoi_iou_reward/std": 0.3093617558479309, "step": 28 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 450.0, "completions/max_terminated_length": 450.0, "completions/mean_length": 174.78125, "completions/mean_terminated_length": 174.78125, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.01233517652062952, "frac_reward_zero_std": 0.0, "grad_norm": 1.393183946609497, "kl": 0.0007882892386987805, "learning_rate": 1.971830985915493e-06, "loss": -0.024, "num_tokens": 2380987.0, "reward": 2.2392172813415527, "reward_std": 0.3153127431869507, "rewards/format_hoi_key_reward/mean": 0.7479861378669739, "rewards/format_hoi_key_reward/std": 0.3090425729751587, "rewards/format_hoi_object_label_reward/mean": 0.5729166865348816, "rewards/format_hoi_object_label_reward/std": 0.4102790057659149, "rewards/format_hoi_verb_label_reward/mean": 0.3237847089767456, "rewards/format_hoi_verb_label_reward/std": 0.2613273561000824, "rewards/hoi_iou_reward/mean": 0.5945297479629517, "rewards/hoi_iou_reward/std": 0.3360900580883026, "step": 29 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 311.0, "completions/max_terminated_length": 311.0, "completions/mean_length": 157.1875, "completions/mean_terminated_length": 157.1875, "completions/min_length": 95.0, "completions/min_terminated_length": 95.0, "epoch": 0.012760527435133985, "frac_reward_zero_std": 0.0, "grad_norm": 1.3999539613723755, "kl": 0.0008022689580684528, "learning_rate": 2.0422535211267608e-06, "loss": -0.0204, "num_tokens": 2462487.0, "reward": 2.1738319396972656, "reward_std": 0.19202688336372375, "rewards/format_hoi_key_reward/mean": 0.7598214149475098, "rewards/format_hoi_key_reward/std": 0.29326102137565613, "rewards/format_hoi_object_label_reward/mean": 0.5598958730697632, "rewards/format_hoi_object_label_reward/std": 0.41382598876953125, "rewards/format_hoi_verb_label_reward/mean": 0.3598484992980957, "rewards/format_hoi_verb_label_reward/std": 0.3271215260028839, "rewards/hoi_iou_reward/mean": 0.49426594376564026, "rewards/hoi_iou_reward/std": 0.2569417953491211, "step": 30 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 492.0, "completions/mean_length": 213.78125, "completions/mean_terminated_length": 204.16128540039062, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.013185878349638452, "frac_reward_zero_std": 0.0, "grad_norm": 5.476845741271973, "kl": 0.000950784218730405, "learning_rate": 2.1126760563380285e-06, "loss": 0.004, "num_tokens": 2545612.0, "reward": 2.5632033348083496, "reward_std": 0.5354008078575134, "rewards/format_hoi_key_reward/mean": 0.7911458015441895, "rewards/format_hoi_key_reward/std": 0.23706629872322083, "rewards/format_hoi_object_label_reward/mean": 0.7135416865348816, "rewards/format_hoi_object_label_reward/std": 0.32581403851509094, "rewards/format_hoi_verb_label_reward/mean": 0.4348958432674408, "rewards/format_hoi_verb_label_reward/std": 0.3128693997859955, "rewards/hoi_iou_reward/mean": 0.6236201524734497, "rewards/hoi_iou_reward/std": 0.2516784965991974, "step": 31 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 274.0, "completions/mean_length": 174.40625, "completions/mean_terminated_length": 163.51612854003906, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.013611229264142918, "frac_reward_zero_std": 0.0, "grad_norm": 2.7625715732574463, "kl": 0.001197469318867661, "learning_rate": 2.1830985915492958e-06, "loss": -0.0273, "num_tokens": 2627593.0, "reward": 2.5554096698760986, "reward_std": 0.28477969765663147, "rewards/format_hoi_key_reward/mean": 0.784375011920929, "rewards/format_hoi_key_reward/std": 0.2782413065433502, "rewards/format_hoi_object_label_reward/mean": 0.65625, "rewards/format_hoi_object_label_reward/std": 0.38553017377853394, "rewards/format_hoi_verb_label_reward/mean": 0.49926215410232544, "rewards/format_hoi_verb_label_reward/std": 0.38366368412971497, "rewards/hoi_iou_reward/mean": 0.6155223846435547, "rewards/hoi_iou_reward/std": 0.3186318576335907, "step": 32 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 333.0, "completions/max_terminated_length": 333.0, "completions/mean_length": 167.9375, "completions/mean_terminated_length": 167.9375, "completions/min_length": 109.0, "completions/min_terminated_length": 109.0, "epoch": 0.014036580178647383, "frac_reward_zero_std": 0.0, "grad_norm": 2.134929656982422, "kl": 0.0012024562747683376, "learning_rate": 2.2535211267605635e-06, "loss": -0.0295, "num_tokens": 2709249.0, "reward": 2.4919838905334473, "reward_std": 0.2809957265853882, "rewards/format_hoi_key_reward/mean": 0.8414583206176758, "rewards/format_hoi_key_reward/std": 0.20066367089748383, "rewards/format_hoi_object_label_reward/mean": 0.6541666984558105, "rewards/format_hoi_object_label_reward/std": 0.40945371985435486, "rewards/format_hoi_verb_label_reward/mean": 0.3828125298023224, "rewards/format_hoi_verb_label_reward/std": 0.28444916009902954, "rewards/hoi_iou_reward/mean": 0.6135461330413818, "rewards/hoi_iou_reward/std": 0.25517329573631287, "step": 33 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 512.0, "completions/max_terminated_length": 385.0, "completions/mean_length": 204.59375, "completions/mean_terminated_length": 184.10000610351562, "completions/min_length": 93.0, "completions/min_terminated_length": 93.0, "epoch": 0.01446193109315185, "frac_reward_zero_std": 0.125, "grad_norm": 1.4026188850402832, "kl": 0.001380591158522293, "learning_rate": 2.323943661971831e-06, "loss": -0.0124, "num_tokens": 2792274.0, "reward": 2.4142298698425293, "reward_std": 0.3502156734466553, "rewards/format_hoi_key_reward/mean": 0.7880208492279053, "rewards/format_hoi_key_reward/std": 0.28556156158447266, "rewards/format_hoi_object_label_reward/mean": 0.5703125, "rewards/format_hoi_object_label_reward/std": 0.41967159509658813, "rewards/format_hoi_verb_label_reward/mean": 0.4947916865348816, "rewards/format_hoi_verb_label_reward/std": 0.39933720231056213, "rewards/hoi_iou_reward/mean": 0.5611048936843872, "rewards/hoi_iou_reward/std": 0.3223456144332886, "step": 34 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 489.0, "completions/mean_length": 196.3125, "completions/mean_terminated_length": 186.1290283203125, "completions/min_length": 125.0, "completions/min_terminated_length": 125.0, "epoch": 0.014887282007656316, "frac_reward_zero_std": 0.0, "grad_norm": 1.316129207611084, "kl": 0.0016481104830745608, "learning_rate": 2.3943661971830984e-06, "loss": -0.0075, "num_tokens": 2874978.0, "reward": 2.8513381481170654, "reward_std": 0.4244565963745117, "rewards/format_hoi_key_reward/mean": 0.8733173608779907, "rewards/format_hoi_key_reward/std": 0.24515916407108307, "rewards/format_hoi_object_label_reward/mean": 0.7844551205635071, "rewards/format_hoi_object_label_reward/std": 0.3160136938095093, "rewards/format_hoi_verb_label_reward/mean": 0.42851561307907104, "rewards/format_hoi_verb_label_reward/std": 0.32844552397727966, "rewards/hoi_iou_reward/mean": 0.7650501728057861, "rewards/hoi_iou_reward/std": 0.2422105222940445, "step": 35 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0625, "completions/max_length": 512.0, "completions/max_terminated_length": 458.0, "completions/mean_length": 212.78125, "completions/mean_terminated_length": 192.83334350585938, "completions/min_length": 96.0, "completions/min_terminated_length": 96.0, "epoch": 0.015312632922160783, "frac_reward_zero_std": 0.0625, "grad_norm": 1.0710198879241943, "kl": 0.0023336158774327487, "learning_rate": 2.4647887323943666e-06, "loss": 0.0636, "num_tokens": 2957965.0, "reward": 2.45017147064209, "reward_std": 0.37150126695632935, "rewards/format_hoi_key_reward/mean": 0.7733333110809326, "rewards/format_hoi_key_reward/std": 0.2864827811717987, "rewards/format_hoi_object_label_reward/mean": 0.6041666865348816, "rewards/format_hoi_object_label_reward/std": 0.4018549919128418, "rewards/format_hoi_verb_label_reward/mean": 0.4917534589767456, "rewards/format_hoi_verb_label_reward/std": 0.3289114534854889, "rewards/hoi_iou_reward/mean": 0.5809179544448853, "rewards/hoi_iou_reward/std": 0.2540324032306671, "step": 36 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 269.0, "completions/mean_length": 176.375, "completions/mean_terminated_length": 165.5483856201172, "completions/min_length": 114.0, "completions/min_terminated_length": 114.0, "epoch": 0.015737983836665248, "frac_reward_zero_std": 0.0, "grad_norm": 1.5639984607696533, "kl": 0.002618325990624726, "learning_rate": 2.535211267605634e-06, "loss": 0.0076, "num_tokens": 3040049.0, "reward": 2.503605365753174, "reward_std": 0.18485644459724426, "rewards/format_hoi_key_reward/mean": 0.7159454822540283, "rewards/format_hoi_key_reward/std": 0.33074691891670227, "rewards/format_hoi_object_label_reward/mean": 0.6513621807098389, "rewards/format_hoi_object_label_reward/std": 0.37841737270355225, "rewards/format_hoi_verb_label_reward/mean": 0.5282986164093018, "rewards/format_hoi_verb_label_reward/std": 0.37431567907333374, "rewards/hoi_iou_reward/mean": 0.6079990863800049, "rewards/hoi_iou_reward/std": 0.28626692295074463, "step": 37 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 488.0, "completions/max_terminated_length": 488.0, "completions/mean_length": 187.28125, "completions/mean_terminated_length": 187.28125, "completions/min_length": 113.0, "completions/min_terminated_length": 113.0, "epoch": 0.016163334751169715, "frac_reward_zero_std": 0.0, "grad_norm": 3.382786750793457, "kl": 0.003079615533351898, "learning_rate": 2.6056338028169015e-06, "loss": -0.0705, "num_tokens": 3122320.0, "reward": 2.1021814346313477, "reward_std": 0.3316464424133301, "rewards/format_hoi_key_reward/mean": 0.6679166555404663, "rewards/format_hoi_key_reward/std": 0.3353461027145386, "rewards/format_hoi_object_label_reward/mean": 0.5670138597488403, "rewards/format_hoi_object_label_reward/std": 0.4418155550956726, "rewards/format_hoi_verb_label_reward/mean": 0.3420138955116272, "rewards/format_hoi_verb_label_reward/std": 0.37342512607574463, "rewards/hoi_iou_reward/mean": 0.5252367854118347, "rewards/hoi_iou_reward/std": 0.34420183300971985, "step": 38 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.0, "completions/max_length": 265.0, "completions/max_terminated_length": 265.0, "completions/mean_length": 155.53125, "completions/mean_terminated_length": 155.53125, "completions/min_length": 119.0, "completions/min_terminated_length": 119.0, "epoch": 0.01658868566567418, "frac_reward_zero_std": 0.0, "grad_norm": 3.260523796081543, "kl": 0.002960343728773296, "learning_rate": 2.676056338028169e-06, "loss": 0.0217, "num_tokens": 3203647.0, "reward": 2.722006320953369, "reward_std": 0.10617414861917496, "rewards/format_hoi_key_reward/mean": 0.8614583015441895, "rewards/format_hoi_key_reward/std": 0.23783813416957855, "rewards/format_hoi_object_label_reward/mean": 0.7364583611488342, "rewards/format_hoi_object_label_reward/std": 0.3555357754230499, "rewards/format_hoi_verb_label_reward/mean": 0.44348961114883423, "rewards/format_hoi_verb_label_reward/std": 0.21557556092739105, "rewards/hoi_iou_reward/mean": 0.6806000471115112, "rewards/hoi_iou_reward/std": 0.24680498242378235, "step": 39 }, { "clip_ratio": 0.0, "completions/clipped_ratio": 0.03125, "completions/max_length": 512.0, "completions/max_terminated_length": 507.0, "completions/mean_length": 195.375, "completions/mean_terminated_length": 185.16128540039062, "completions/min_length": 116.0, "completions/min_terminated_length": 116.0, "epoch": 0.017014036580178648, "frac_reward_zero_std": 0.0, "grad_norm": 1.774012565612793, "kl": 0.0033842482953332365, "learning_rate": 2.746478873239437e-06, "loss": -0.0124, "num_tokens": 3286303.0, "reward": 2.2706918716430664, "reward_std": 0.18238192796707153, "rewards/format_hoi_key_reward/mean": 0.71370530128479, "rewards/format_hoi_key_reward/std": 0.3266391158103943, "rewards/format_hoi_object_label_reward/mean": 0.5867311954498291, "rewards/format_hoi_object_label_reward/std": 0.3556922376155853, "rewards/format_hoi_verb_label_reward/mean": 0.3763934075832367, "rewards/format_hoi_verb_label_reward/std": 0.3192271590232849, "rewards/hoi_iou_reward/mean": 0.5938619375228882, "rewards/hoi_iou_reward/std": 0.3091049790382385, "step": 40 } ], "logging_steps": 1.0, "max_steps": 2351, "num_input_tokens_seen": 3286303, "num_train_epochs": 1, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }