| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 48, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.328125, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1022.0, |
| "completions/mean_length": 823.9140625, |
| "completions/mean_terminated_length": 726.1976928710938, |
| "completions/min_length": 240.0, |
| "completions/min_terminated_length": 240.0, |
| "entropy": 0.10510256746783853, |
| "epoch": 0.041666666666666664, |
| "frac_reward_zero_std": 0.203125, |
| "grad_norm": 0.116986483335495, |
| "learning_rate": 0.0, |
| "loss": 0.0234, |
| "num_tokens": 477500.0, |
| "reward": 0.5213682651519775, |
| "reward_std": 0.2439471185207367, |
| "rewards/<lambda>/mean": 0.5213682651519775, |
| "rewards/<lambda>/std": 0.5060697793960571, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999539852142334, |
| "sampling/importance_sampling_ratio/min": 0.051202189177274704, |
| "sampling/sampling_logp_difference/max": 2.971972942352295, |
| "sampling/sampling_logp_difference/mean": 0.009669496677815914, |
| "step": 1, |
| "step_time": 74.34781758487225 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.34765625, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1024.0, |
| "completions/mean_length": 845.5703125, |
| "completions/mean_terminated_length": 750.4790649414062, |
| "completions/min_length": 263.0, |
| "completions/min_terminated_length": 263.0, |
| "entropy": 0.10980924824252725, |
| "epoch": 0.08333333333333333, |
| "frac_reward_zero_std": 0.171875, |
| "grad_norm": 0.10744482278823853, |
| "learning_rate": 3e-06, |
| "loss": 0.0278, |
| "num_tokens": 965576.0, |
| "reward": 0.519410252571106, |
| "reward_std": 0.3138212561607361, |
| "rewards/<lambda>/mean": 0.519410252571106, |
| "rewards/<lambda>/std": 0.506157398223877, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.000095248222351, |
| "sampling/importance_sampling_ratio/min": 0.20458395779132843, |
| "sampling/sampling_logp_difference/max": 1.586776852607727, |
| "sampling/sampling_logp_difference/mean": 0.010054240934550762, |
| "step": 2, |
| "step_time": 53.79163959249854 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.48046875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1022.0, |
| "completions/mean_length": 882.068359375, |
| "completions/mean_terminated_length": 750.8082885742188, |
| "completions/min_length": 302.0, |
| "completions/min_terminated_length": 302.0, |
| "entropy": 0.11614924483001232, |
| "epoch": 0.125, |
| "frac_reward_zero_std": 0.109375, |
| "grad_norm": 0.10017473250627518, |
| "learning_rate": 6e-06, |
| "loss": 0.0165, |
| "num_tokens": 1480499.0, |
| "reward": 0.33390700817108154, |
| "reward_std": 0.2303066849708557, |
| "rewards/<lambda>/mean": 0.33390700817108154, |
| "rewards/<lambda>/std": 0.4804892838001251, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000317096710205, |
| "sampling/importance_sampling_ratio/min": 0.06684955954551697, |
| "sampling/sampling_logp_difference/max": 2.705310583114624, |
| "sampling/sampling_logp_difference/mean": 0.010464111343026161, |
| "step": 3, |
| "step_time": 57.42266962304711 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.32421875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1022.0, |
| "completions/mean_length": 814.55859375, |
| "completions/mean_terminated_length": 714.0751342773438, |
| "completions/min_length": 345.0, |
| "completions/min_terminated_length": 345.0, |
| "entropy": 0.11400253418833017, |
| "epoch": 0.16666666666666666, |
| "frac_reward_zero_std": 0.109375, |
| "grad_norm": 0.10569294542074203, |
| "learning_rate": 9e-06, |
| "loss": 0.0248, |
| "num_tokens": 1952593.0, |
| "reward": 0.4981191158294678, |
| "reward_std": 0.31772834062576294, |
| "rewards/<lambda>/mean": 0.498119056224823, |
| "rewards/<lambda>/std": 0.5063196420669556, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000189542770386, |
| "sampling/importance_sampling_ratio/min": 0.14644451439380646, |
| "sampling/sampling_logp_difference/max": 1.9211087226867676, |
| "sampling/sampling_logp_difference/mean": 0.010197827592492104, |
| "step": 4, |
| "step_time": 51.03766080364585 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.388671875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1021.0, |
| "completions/mean_length": 835.1875, |
| "completions/mean_terminated_length": 715.1437377929688, |
| "completions/min_length": 356.0, |
| "completions/min_terminated_length": 356.0, |
| "entropy": 0.12000379897654057, |
| "epoch": 0.20833333333333334, |
| "frac_reward_zero_std": 0.140625, |
| "grad_norm": 0.09225241839885712, |
| "learning_rate": 1.2e-05, |
| "loss": 0.0245, |
| "num_tokens": 2437185.0, |
| "reward": 0.4845890998840332, |
| "reward_std": 0.2662581205368042, |
| "rewards/<lambda>/mean": 0.4845891296863556, |
| "rewards/<lambda>/std": 0.5059504508972168, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999595880508423, |
| "sampling/importance_sampling_ratio/min": 0.18561066687107086, |
| "sampling/sampling_logp_difference/max": 1.6841039657592773, |
| "sampling/sampling_logp_difference/mean": 0.010071037337183952, |
| "step": 5, |
| "step_time": 53.15481134876609 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.333984375, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1020.0, |
| "completions/mean_length": 833.9453125, |
| "completions/mean_terminated_length": 744.50439453125, |
| "completions/min_length": 357.0, |
| "completions/min_terminated_length": 357.0, |
| "entropy": 0.12417639419436455, |
| "epoch": 0.25, |
| "frac_reward_zero_std": 0.234375, |
| "grad_norm": 0.08464518189430237, |
| "learning_rate": 1.5e-05, |
| "loss": 0.0223, |
| "num_tokens": 2924157.0, |
| "reward": 0.5296170711517334, |
| "reward_std": 0.2583223283290863, |
| "rewards/<lambda>/mean": 0.5296170711517334, |
| "rewards/<lambda>/std": 0.505209743976593, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999397993087769, |
| "sampling/importance_sampling_ratio/min": 0.00012402510037645698, |
| "sampling/sampling_logp_difference/max": 8.995026588439941, |
| "sampling/sampling_logp_difference/mean": 0.010162664577364922, |
| "step": 6, |
| "step_time": 62.87784644961357 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.548828125, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1024.0, |
| "completions/mean_length": 895.59375, |
| "completions/mean_terminated_length": 739.3939208984375, |
| "completions/min_length": 410.0, |
| "completions/min_terminated_length": 410.0, |
| "entropy": 0.17639623675495386, |
| "epoch": 0.2916666666666667, |
| "frac_reward_zero_std": 0.03125, |
| "grad_norm": 0.08520859479904175, |
| "learning_rate": 1.4979992127476638e-05, |
| "loss": 0.0157, |
| "num_tokens": 3444285.0, |
| "reward": 0.2455175518989563, |
| "reward_std": 0.23690563440322876, |
| "rewards/<lambda>/mean": 0.2455175369977951, |
| "rewards/<lambda>/std": 0.4428788125514984, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999836683273315, |
| "sampling/importance_sampling_ratio/min": 0.039996612817049026, |
| "sampling/sampling_logp_difference/max": 3.2189605236053467, |
| "sampling/sampling_logp_difference/mean": 0.013452851213514805, |
| "step": 7, |
| "step_time": 57.17111527174711 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.41796875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1022.0, |
| "completions/mean_length": 863.2109375, |
| "completions/mean_terminated_length": 747.7449951171875, |
| "completions/min_length": 358.0, |
| "completions/min_terminated_length": 358.0, |
| "entropy": 0.18227362260222435, |
| "epoch": 0.3333333333333333, |
| "frac_reward_zero_std": 0.140625, |
| "grad_norm": 0.10602783411741257, |
| "learning_rate": 1.4920075260563328e-05, |
| "loss": 0.0272, |
| "num_tokens": 3950121.0, |
| "reward": 0.47941911220550537, |
| "reward_std": 0.2964603304862976, |
| "rewards/<lambda>/mean": 0.47941911220550537, |
| "rewards/<lambda>/std": 0.507050096988678, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999715685844421, |
| "sampling/importance_sampling_ratio/min": 0.2541903555393219, |
| "sampling/sampling_logp_difference/max": 1.475466251373291, |
| "sampling/sampling_logp_difference/mean": 0.013659531250596046, |
| "step": 8, |
| "step_time": 55.14558635652065 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.544921875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1021.0, |
| "completions/mean_length": 901.974609375, |
| "completions/mean_terminated_length": 757.6094360351562, |
| "completions/min_length": 399.0, |
| "completions/min_terminated_length": 399.0, |
| "entropy": 0.18943150993436575, |
| "epoch": 0.375, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 0.11737405508756638, |
| "learning_rate": 1.4820569081669455e-05, |
| "loss": 0.0341, |
| "num_tokens": 4495428.0, |
| "reward": 0.42956632375717163, |
| "reward_std": 0.26905590295791626, |
| "rewards/<lambda>/mean": 0.42956632375717163, |
| "rewards/<lambda>/std": 0.5035831928253174, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.999960720539093, |
| "sampling/importance_sampling_ratio/min": 0.015176300890743732, |
| "sampling/sampling_logp_difference/max": 4.1880202293396, |
| "sampling/sampling_logp_difference/mean": 0.01382643636316061, |
| "step": 9, |
| "step_time": 59.8278575129807 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.515625, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1024.0, |
| "completions/mean_length": 890.86328125, |
| "completions/mean_terminated_length": 749.1370849609375, |
| "completions/min_length": 323.0, |
| "completions/min_terminated_length": 323.0, |
| "entropy": 0.1795631218701601, |
| "epoch": 0.4166666666666667, |
| "frac_reward_zero_std": 0.25, |
| "grad_norm": 0.10624588280916214, |
| "learning_rate": 1.4682004499313044e-05, |
| "loss": 0.0192, |
| "num_tokens": 5042470.0, |
| "reward": 0.4718334674835205, |
| "reward_std": 0.19827762246131897, |
| "rewards/<lambda>/mean": 0.4718334972858429, |
| "rewards/<lambda>/std": 0.5064524412155151, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999367594718933, |
| "sampling/importance_sampling_ratio/min": 0.0721684917807579, |
| "sampling/sampling_logp_difference/max": 2.628751754760742, |
| "sampling/sampling_logp_difference/mean": 0.01308610662817955, |
| "step": 10, |
| "step_time": 60.118371706455946 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.654296875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1003.0, |
| "completions/mean_length": 928.134765625, |
| "completions/mean_terminated_length": 746.6949462890625, |
| "completions/min_length": 349.0, |
| "completions/min_terminated_length": 349.0, |
| "entropy": 0.17855694890022278, |
| "epoch": 0.4583333333333333, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 0.09456736594438553, |
| "learning_rate": 1.450512081549411e-05, |
| "loss": 0.0193, |
| "num_tokens": 5602051.0, |
| "reward": 0.3620399832725525, |
| "reward_std": 0.20572692155838013, |
| "rewards/<lambda>/mean": 0.3620400130748749, |
| "rewards/<lambda>/std": 0.4884944558143616, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999369382858276, |
| "sampling/importance_sampling_ratio/min": 0.07273312658071518, |
| "sampling/sampling_logp_difference/max": 2.6209583282470703, |
| "sampling/sampling_logp_difference/mean": 0.012828832492232323, |
| "step": 11, |
| "step_time": 58.71875632926822 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.853515625, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1023.0, |
| "completions/mean_length": 998.62890625, |
| "completions/mean_terminated_length": 850.800048828125, |
| "completions/min_length": 490.0, |
| "completions/min_terminated_length": 490.0, |
| "entropy": 0.1622396009042859, |
| "epoch": 0.5, |
| "frac_reward_zero_std": 0.015625, |
| "grad_norm": 0.08454262465238571, |
| "learning_rate": 1.4290861781198601e-05, |
| "loss": 0.0114, |
| "num_tokens": 6176933.0, |
| "reward": 0.17149432003498077, |
| "reward_std": 0.19794043898582458, |
| "rewards/<lambda>/mean": 0.17149433493614197, |
| "rewards/<lambda>/std": 0.3933511972427368, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9998893737792969, |
| "sampling/importance_sampling_ratio/min": 0.1942903995513916, |
| "sampling/sampling_logp_difference/max": 1.6384012699127197, |
| "sampling/sampling_logp_difference/mean": 0.0115684624761343, |
| "step": 12, |
| "step_time": 58.48493871092796 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.79296875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1022.0, |
| "completions/mean_length": 982.77734375, |
| "completions/mean_terminated_length": 859.4528198242188, |
| "completions/min_length": 566.0, |
| "completions/min_terminated_length": 620.0, |
| "entropy": 0.1731550320982933, |
| "epoch": 0.5416666666666666, |
| "frac_reward_zero_std": 0.078125, |
| "grad_norm": 0.07863683998584747, |
| "learning_rate": 1.4040370561078558e-05, |
| "loss": 0.0125, |
| "num_tokens": 6756187.0, |
| "reward": 0.25542140007019043, |
| "reward_std": 0.1804811656475067, |
| "rewards/<lambda>/mean": 0.25542140007019043, |
| "rewards/<lambda>/std": 0.44602400064468384, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000680685043335, |
| "sampling/importance_sampling_ratio/min": 0.02221822179853916, |
| "sampling/sampling_logp_difference/max": 3.806842565536499, |
| "sampling/sampling_logp_difference/mean": 0.012337762862443924, |
| "step": 13, |
| "step_time": 68.92022440582514 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.884765625, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1022.0, |
| "completions/mean_length": 1007.6796875, |
| "completions/mean_terminated_length": 882.3728637695312, |
| "completions/min_length": 559.0, |
| "completions/min_terminated_length": 559.0, |
| "entropy": 0.17464189883321524, |
| "epoch": 0.5833333333333334, |
| "frac_reward_zero_std": 0.046875, |
| "grad_norm": 0.07638600468635559, |
| "learning_rate": 1.3754983634174084e-05, |
| "loss": 0.0054, |
| "num_tokens": 7335399.0, |
| "reward": 0.23066657781600952, |
| "reward_std": 0.20333421230316162, |
| "rewards/<lambda>/mean": 0.23066657781600952, |
| "rewards/<lambda>/std": 0.43073543906211853, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999949932098389, |
| "sampling/importance_sampling_ratio/min": 0.041199441999197006, |
| "sampling/sampling_logp_difference/max": 3.189330577850342, |
| "sampling/sampling_logp_difference/mean": 0.012287369929254055, |
| "step": 14, |
| "step_time": 59.96094610914588 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.87109375, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1022.0, |
| "completions/mean_length": 996.556640625, |
| "completions/mean_terminated_length": 827.5909423828125, |
| "completions/min_length": 550.0, |
| "completions/min_terminated_length": 550.0, |
| "entropy": 0.17954212613403797, |
| "epoch": 0.625, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 0.08646295964717865, |
| "learning_rate": 1.3436223663219406e-05, |
| "loss": 0.0049, |
| "num_tokens": 7913092.0, |
| "reward": 0.2230292558670044, |
| "reward_std": 0.19263553619384766, |
| "rewards/<lambda>/mean": 0.2230292558670044, |
| "rewards/<lambda>/std": 0.42564159631729126, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999350309371948, |
| "sampling/importance_sampling_ratio/min": 0.011985468678176403, |
| "sampling/sampling_logp_difference/max": 4.424060344696045, |
| "sampling/sampling_logp_difference/mean": 0.01234557293355465, |
| "step": 15, |
| "step_time": 64.32212274521589 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.701171875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1018.0, |
| "completions/mean_length": 949.470703125, |
| "completions/mean_terminated_length": 795.2483520507812, |
| "completions/min_length": 301.0, |
| "completions/min_terminated_length": 301.0, |
| "entropy": 0.17093131132423878, |
| "epoch": 0.6666666666666666, |
| "frac_reward_zero_std": 0.140625, |
| "grad_norm": 0.06887029111385345, |
| "learning_rate": 1.3085791370578364e-05, |
| "loss": 0.0115, |
| "num_tokens": 8462493.0, |
| "reward": 0.4387291669845581, |
| "reward_std": 0.2984377145767212, |
| "rewards/<lambda>/mean": 0.4387291967868805, |
| "rewards/<lambda>/std": 0.5014151334762573, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999709129333496, |
| "sampling/importance_sampling_ratio/min": 0.11177696287631989, |
| "sampling/sampling_logp_difference/max": 2.1912498474121094, |
| "sampling/sampling_logp_difference/mean": 0.011749806813895702, |
| "step": 16, |
| "step_time": 75.07067326828837 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.544921875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1022.0, |
| "completions/mean_length": 919.498046875, |
| "completions/mean_terminated_length": 794.3648071289062, |
| "completions/min_length": 303.0, |
| "completions/min_terminated_length": 303.0, |
| "entropy": 0.16775457188487053, |
| "epoch": 0.7083333333333334, |
| "frac_reward_zero_std": 0.34375, |
| "grad_norm": 0.04024207964539528, |
| "learning_rate": 1.2705556464154755e-05, |
| "loss": 0.0184, |
| "num_tokens": 8985980.0, |
| "reward": 0.6928114295005798, |
| "reward_std": 0.2674423158168793, |
| "rewards/<lambda>/mean": 0.6928114295005798, |
| "rewards/<lambda>/std": 0.46451425552368164, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.000028371810913, |
| "sampling/importance_sampling_ratio/min": 0.008241385221481323, |
| "sampling/sampling_logp_difference/max": 4.798586845397949, |
| "sampling/sampling_logp_difference/mean": 0.011188083328306675, |
| "step": 17, |
| "step_time": 53.734655763953924 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.46875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1024.0, |
| "completions/mean_length": 880.052734375, |
| "completions/mean_terminated_length": 753.0404663085938, |
| "completions/min_length": 305.0, |
| "completions/min_terminated_length": 305.0, |
| "entropy": 0.173905773088336, |
| "epoch": 0.75, |
| "frac_reward_zero_std": 0.234375, |
| "grad_norm": 0.08793645352125168, |
| "learning_rate": 1.2297547661691685e-05, |
| "loss": 0.0361, |
| "num_tokens": 9487111.0, |
| "reward": 0.6395045518875122, |
| "reward_std": 0.28389185667037964, |
| "rewards/<lambda>/mean": 0.6395045518875122, |
| "rewards/<lambda>/std": 0.48384764790534973, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.000001072883606, |
| "sampling/importance_sampling_ratio/min": 0.13127455115318298, |
| "sampling/sampling_logp_difference/max": 2.0304644107818604, |
| "sampling/sampling_logp_difference/mean": 0.01144577655941248, |
| "step": 18, |
| "step_time": 52.49561759829521 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.30859375, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1018.0, |
| "completions/mean_length": 767.435546875, |
| "completions/mean_terminated_length": 653.4661254882812, |
| "completions/min_length": 262.0, |
| "completions/min_terminated_length": 262.0, |
| "entropy": 0.1821697074919939, |
| "epoch": 0.7916666666666666, |
| "frac_reward_zero_std": 0.484375, |
| "grad_norm": 0.10041403770446777, |
| "learning_rate": 1.1863941866684647e-05, |
| "loss": 0.0314, |
| "num_tokens": 9947318.0, |
| "reward": 0.7617002725601196, |
| "reward_std": 0.20842789113521576, |
| "rewards/<lambda>/mean": 0.7617002725601196, |
| "rewards/<lambda>/std": 0.4287981688976288, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9998761415481567, |
| "sampling/importance_sampling_ratio/min": 0.0028160586953163147, |
| "sampling/sampling_logp_difference/max": 5.872416973114014, |
| "sampling/sampling_logp_difference/mean": 0.012468406930565834, |
| "step": 19, |
| "step_time": 58.54368192702532 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.1875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1023.0, |
| "completions/mean_length": 649.126953125, |
| "completions/mean_terminated_length": 565.0240478515625, |
| "completions/min_length": 250.0, |
| "completions/min_terminated_length": 250.0, |
| "entropy": 0.18335528578609228, |
| "epoch": 0.8333333333333334, |
| "frac_reward_zero_std": 0.53125, |
| "grad_norm": 0.041842591017484665, |
| "learning_rate": 1.1407052553659478e-05, |
| "loss": 0.0393, |
| "num_tokens": 10346455.0, |
| "reward": 0.7931854724884033, |
| "reward_std": 0.1657945215702057, |
| "rewards/<lambda>/mean": 0.7931854724884033, |
| "rewards/<lambda>/std": 0.4075835645198822, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999485611915588, |
| "sampling/importance_sampling_ratio/min": 0.001173140830360353, |
| "sampling/sampling_logp_difference/max": 6.74807071685791, |
| "sampling/sampling_logp_difference/mean": 0.012672360055148602, |
| "step": 20, |
| "step_time": 53.72001050412655 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.255859375, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1021.0, |
| "completions/mean_length": 696.78515625, |
| "completions/mean_terminated_length": 584.2781982421875, |
| "completions/min_length": 267.0, |
| "completions/min_terminated_length": 267.0, |
| "entropy": 0.18723426572978497, |
| "epoch": 0.875, |
| "frac_reward_zero_std": 0.375, |
| "grad_norm": 0.08443084359169006, |
| "learning_rate": 1.0929317424784789e-05, |
| "loss": 0.0329, |
| "num_tokens": 10777497.0, |
| "reward": 0.6964685320854187, |
| "reward_std": 0.2045532912015915, |
| "rewards/<lambda>/mean": 0.6964685320854187, |
| "rewards/<lambda>/std": 0.46325141191482544, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999812841415405, |
| "sampling/importance_sampling_ratio/min": 0.24296453595161438, |
| "sampling/sampling_logp_difference/max": 1.885258436203003, |
| "sampling/sampling_logp_difference/mean": 0.013141268864274025, |
| "step": 21, |
| "step_time": 50.97019802033901 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.2109375, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1024.0, |
| "completions/mean_length": 689.134765625, |
| "completions/mean_terminated_length": 600.2401123046875, |
| "completions/min_length": 209.0, |
| "completions/min_terminated_length": 209.0, |
| "entropy": 0.18722887616604567, |
| "epoch": 0.9166666666666666, |
| "frac_reward_zero_std": 0.4375, |
| "grad_norm": 0.07437894493341446, |
| "learning_rate": 1.043328540367617e-05, |
| "loss": 0.0363, |
| "num_tokens": 11194494.0, |
| "reward": 0.7501273155212402, |
| "reward_std": 0.20716464519500732, |
| "rewards/<lambda>/mean": 0.7501273155212402, |
| "rewards/<lambda>/std": 0.43548887968063354, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000048875808716, |
| "sampling/importance_sampling_ratio/min": 0.044193509966135025, |
| "sampling/sampling_logp_difference/max": 3.1191773414611816, |
| "sampling/sampling_logp_difference/mean": 0.012832986190915108, |
| "step": 22, |
| "step_time": 52.78209077939391 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.595703125, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1017.0, |
| "completions/mean_length": 874.330078125, |
| "completions/mean_terminated_length": 653.8019409179688, |
| "completions/min_length": 307.0, |
| "completions/min_terminated_length": 307.0, |
| "entropy": 0.21719548478722572, |
| "epoch": 0.9583333333333334, |
| "frac_reward_zero_std": 0.203125, |
| "grad_norm": 0.07946697622537613, |
| "learning_rate": 9.921603035785846e-06, |
| "loss": 0.029, |
| "num_tokens": 11711007.0, |
| "reward": 0.41294676065444946, |
| "reward_std": 0.21486328542232513, |
| "rewards/<lambda>/mean": 0.41294676065444946, |
| "rewards/<lambda>/std": 0.4979855418205261, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000637769699097, |
| "sampling/importance_sampling_ratio/min": 0.003101334674283862, |
| "sampling/sampling_logp_difference/max": 5.775922775268555, |
| "sampling/sampling_logp_difference/mean": 0.01419367827475071, |
| "step": 23, |
| "step_time": 48.530132196843624 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.7109375, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1016.0, |
| "completions/mean_length": 942.05859375, |
| "completions/mean_terminated_length": 746.4729614257812, |
| "completions/min_length": 380.0, |
| "completions/min_terminated_length": 380.0, |
| "entropy": 0.2433080393821001, |
| "epoch": 1.0, |
| "frac_reward_zero_std": 0.09375, |
| "grad_norm": 0.08330480009317398, |
| "learning_rate": 9.397000367937605e-06, |
| "loss": 0.0218, |
| "num_tokens": 12278821.0, |
| "reward": 0.2651611268520355, |
| "reward_std": 0.17255176603794098, |
| "rewards/<lambda>/mean": 0.2651611268520355, |
| "rewards/<lambda>/std": 0.44904500246047974, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000908374786377, |
| "sampling/importance_sampling_ratio/min": 0.04702044650912285, |
| "sampling/sampling_logp_difference/max": 3.0571727752685547, |
| "sampling/sampling_logp_difference/mean": 0.015090242959558964, |
| "step": 24, |
| "step_time": 61.07411051169038 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.244140625, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1019.0, |
| "completions/mean_length": 738.345703125, |
| "completions/mean_terminated_length": 646.0801391601562, |
| "completions/min_length": 322.0, |
| "completions/min_terminated_length": 322.0, |
| "entropy": 0.20082950219511986, |
| "epoch": 1.0416666666666667, |
| "frac_reward_zero_std": 0.421875, |
| "grad_norm": 0.09732793271541595, |
| "learning_rate": 8.862276382345772e-06, |
| "loss": 0.0474, |
| "num_tokens": 12712510.0, |
| "reward": 0.7737147808074951, |
| "reward_std": 0.24896946549415588, |
| "rewards/<lambda>/mean": 0.7737147212028503, |
| "rewards/<lambda>/std": 0.42085471749305725, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999430179595947, |
| "sampling/importance_sampling_ratio/min": 0.002486642450094223, |
| "sampling/sampling_logp_difference/max": 5.996821880340576, |
| "sampling/sampling_logp_difference/mean": 0.013192282989621162, |
| "step": 25, |
| "step_time": 51.378925789147615 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.533203125, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1024.0, |
| "completions/mean_length": 865.376953125, |
| "completions/mean_terminated_length": 684.1882934570312, |
| "completions/min_length": 385.0, |
| "completions/min_terminated_length": 385.0, |
| "entropy": 0.1902909567579627, |
| "epoch": 1.0833333333333333, |
| "frac_reward_zero_std": 0.359375, |
| "grad_norm": 0.07519173622131348, |
| "learning_rate": 8.32028406283406e-06, |
| "loss": 0.0145, |
| "num_tokens": 13210727.0, |
| "reward": 0.7598530054092407, |
| "reward_std": 0.2680090367794037, |
| "rewards/<lambda>/mean": 0.7598530054092407, |
| "rewards/<lambda>/std": 0.42980262637138367, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999756813049316, |
| "sampling/importance_sampling_ratio/min": 0.06907939910888672, |
| "sampling/sampling_logp_difference/max": 2.6724987030029297, |
| "sampling/sampling_logp_difference/mean": 0.01252746395766735, |
| "step": 26, |
| "step_time": 54.474919099360704 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.8515625, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1021.0, |
| "completions/mean_length": 978.05078125, |
| "completions/mean_terminated_length": 714.4473876953125, |
| "completions/min_length": 389.0, |
| "completions/min_terminated_length": 389.0, |
| "entropy": 0.18582966551184654, |
| "epoch": 1.125, |
| "frac_reward_zero_std": 0.28125, |
| "grad_norm": 0.0853697806596756, |
| "learning_rate": 7.773915172932443e-06, |
| "loss": 0.0036, |
| "num_tokens": 13774793.0, |
| "reward": 0.7163721919059753, |
| "reward_std": 0.2866782546043396, |
| "rewards/<lambda>/mean": 0.7163721919059753, |
| "rewards/<lambda>/std": 0.45386844873428345, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999269247055054, |
| "sampling/importance_sampling_ratio/min": 0.03673957660794258, |
| "sampling/sampling_logp_difference/max": 3.303900718688965, |
| "sampling/sampling_logp_difference/mean": 0.011891147121787071, |
| "step": 27, |
| "step_time": 59.99074776098132 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.873046875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 1024.0, |
| "completions/mean_length": 985.720703125, |
| "completions/mean_terminated_length": 722.4769287109375, |
| "completions/min_length": 410.0, |
| "completions/min_terminated_length": 410.0, |
| "entropy": 0.17072805669158697, |
| "epoch": 1.1666666666666667, |
| "frac_reward_zero_std": 0.46875, |
| "grad_norm": 0.06768080592155457, |
| "learning_rate": 7.226084827067558e-06, |
| "loss": 0.0008, |
| "num_tokens": 14334522.0, |
| "reward": 0.8005068302154541, |
| "reward_std": 0.21028977632522583, |
| "rewards/<lambda>/mean": 0.8005068302154541, |
| "rewards/<lambda>/std": 0.40284714102745056, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999420642852783, |
| "sampling/importance_sampling_ratio/min": 0.0021877989638596773, |
| "sampling/sampling_logp_difference/max": 6.12485933303833, |
| "sampling/sampling_logp_difference/mean": 0.01087690144777298, |
| "step": 28, |
| "step_time": 54.91908521205187 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.974609375, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 955.0, |
| "completions/mean_length": 1017.001953125, |
| "completions/mean_terminated_length": 748.3846435546875, |
| "completions/min_length": 463.0, |
| "completions/min_terminated_length": 463.0, |
| "entropy": 0.18543414678424597, |
| "epoch": 1.2083333333333333, |
| "frac_reward_zero_std": 0.28125, |
| "grad_norm": 0.08286742866039276, |
| "learning_rate": 6.679715937165944e-06, |
| "loss": -0.0, |
| "num_tokens": 14912203.0, |
| "reward": 0.6823173761367798, |
| "reward_std": 0.28883033990859985, |
| "rewards/<lambda>/mean": 0.6823173761367798, |
| "rewards/<lambda>/std": 0.4695548117160797, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000499486923218, |
| "sampling/importance_sampling_ratio/min": 0.010657834820449352, |
| "sampling/sampling_logp_difference/max": 4.541460037231445, |
| "sampling/sampling_logp_difference/mean": 0.01162964478135109, |
| "step": 29, |
| "step_time": 58.209084182977676 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1020.09375, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 774.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.1811871463432908, |
| "epoch": 1.25, |
| "frac_reward_zero_std": 0.359375, |
| "grad_norm": 0.06104443594813347, |
| "learning_rate": 6.137723617654227e-06, |
| "loss": 0.0, |
| "num_tokens": 15494483.0, |
| "reward": 0.7511924505233765, |
| "reward_std": 0.2601562440395355, |
| "rewards/<lambda>/mean": 0.7511924505233765, |
| "rewards/<lambda>/std": 0.4359346628189087, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000349283218384, |
| "sampling/importance_sampling_ratio/min": 6.476168437075103e-07, |
| "sampling/sampling_logp_difference/max": 14.249966621398926, |
| "sampling/sampling_logp_difference/mean": 0.011373220011591911, |
| "step": 30, |
| "step_time": 67.36348918452859 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1024.0, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 1024.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.238006591796875, |
| "epoch": 1.2916666666666667, |
| "frac_reward_zero_std": 0.1875, |
| "grad_norm": 0.08633699268102646, |
| "learning_rate": 5.602999632062394e-06, |
| "loss": 0.0, |
| "num_tokens": 16080355.0, |
| "reward": 0.4735250473022461, |
| "reward_std": 0.24217532575130463, |
| "rewards/<lambda>/mean": 0.4735250473022461, |
| "rewards/<lambda>/std": 0.5048364400863647, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999916553497314, |
| "sampling/importance_sampling_ratio/min": 0.0006342114065773785, |
| "sampling/sampling_logp_difference/max": 7.363128185272217, |
| "sampling/sampling_logp_difference/mean": 0.013966077007353306, |
| "step": 31, |
| "step_time": 59.991970762610435 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1024.0, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 1024.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.2205047607421875, |
| "epoch": 1.3333333333333333, |
| "frac_reward_zero_std": 0.1875, |
| "grad_norm": 0.06900659203529358, |
| "learning_rate": 5.078396964214155e-06, |
| "loss": 0.0001, |
| "num_tokens": 16668515.0, |
| "reward": 0.5699542760848999, |
| "reward_std": 0.29703187942504883, |
| "rewards/<lambda>/mean": 0.5699542760848999, |
| "rewards/<lambda>/std": 0.5019528865814209, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000271797180176, |
| "sampling/importance_sampling_ratio/min": 0.0721689909696579, |
| "sampling/sampling_logp_difference/max": 2.6287448406219482, |
| "sampling/sampling_logp_difference/mean": 0.01299051009118557, |
| "step": 32, |
| "step_time": 58.6862215436995 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1023.203125, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 973.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.23412521183490753, |
| "epoch": 1.375, |
| "frac_reward_zero_std": 0.140625, |
| "grad_norm": 0.08849670737981796, |
| "learning_rate": 4.566714596323831e-06, |
| "loss": 0.0, |
| "num_tokens": 17275891.0, |
| "reward": 0.45683372020721436, |
| "reward_std": 0.2977556884288788, |
| "rewards/<lambda>/mean": 0.45683372020721436, |
| "rewards/<lambda>/std": 0.5047717094421387, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000598430633545, |
| "sampling/importance_sampling_ratio/min": 0.05894443392753601, |
| "sampling/sampling_logp_difference/max": 2.831160068511963, |
| "sampling/sampling_logp_difference/mean": 0.01418858952820301, |
| "step": 33, |
| "step_time": 62.67323864623904 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.998046875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 442.0, |
| "completions/mean_length": 1022.86328125, |
| "completions/mean_terminated_length": 442.0, |
| "completions/min_length": 442.0, |
| "completions/min_terminated_length": 442.0, |
| "entropy": 0.22855819575488567, |
| "epoch": 1.4166666666666667, |
| "frac_reward_zero_std": 0.140625, |
| "grad_norm": 0.059393007308244705, |
| "learning_rate": 4.0706825752152114e-06, |
| "loss": -0.0, |
| "num_tokens": 17890517.0, |
| "reward": 0.4321480095386505, |
| "reward_std": 0.22312432527542114, |
| "rewards/<lambda>/mean": 0.4321480393409729, |
| "rewards/<lambda>/std": 0.5013213157653809, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000094175338745, |
| "sampling/importance_sampling_ratio/min": 0.05853661522269249, |
| "sampling/sampling_logp_difference/max": 2.8381028175354004, |
| "sampling/sampling_logp_difference/mean": 0.013826340436935425, |
| "step": 34, |
| "step_time": 62.6375826895237 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1024.0, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 1024.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.2439422607421875, |
| "epoch": 1.4583333333333333, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 0.08650114387273788, |
| "learning_rate": 3.592947446340524e-06, |
| "loss": -0.0001, |
| "num_tokens": 18499181.0, |
| "reward": 0.36806878447532654, |
| "reward_std": 0.20019319653511047, |
| "rewards/<lambda>/mean": 0.36806878447532654, |
| "rewards/<lambda>/std": 0.4879459738731384, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999986886978149, |
| "sampling/importance_sampling_ratio/min": 0.15711112320423126, |
| "sampling/sampling_logp_difference/max": 1.850801944732666, |
| "sampling/sampling_logp_difference/mean": 0.014828124083578587, |
| "step": 35, |
| "step_time": 61.840191546827555 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1024.0, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 1024.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.2600250244140625, |
| "epoch": 1.5, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 0.12246455252170563, |
| "learning_rate": 3.136058133315355e-06, |
| "loss": -0.0, |
| "num_tokens": 19087053.0, |
| "reward": 0.3060392141342163, |
| "reward_std": 0.1883051097393036, |
| "rewards/<lambda>/mean": 0.3060392141342163, |
| "rewards/<lambda>/std": 0.4683455228805542, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9998786449432373, |
| "sampling/importance_sampling_ratio/min": 0.0640513002872467, |
| "sampling/sampling_logp_difference/max": 2.7480709552764893, |
| "sampling/sampling_logp_difference/mean": 0.015524221584200859, |
| "step": 36, |
| "step_time": 59.04825992509723 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1016.84375, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 566.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.2629384808242321, |
| "epoch": 1.5416666666666665, |
| "frac_reward_zero_std": 0.046875, |
| "grad_norm": 0.09859054535627365, |
| "learning_rate": 2.702452338308317e-06, |
| "loss": -0.0, |
| "num_tokens": 19683749.0, |
| "reward": 0.21034839749336243, |
| "reward_std": 0.16528862714767456, |
| "rewards/<lambda>/mean": 0.21034839749336243, |
| "rewards/<lambda>/std": 0.4158807694911957, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999657869338989, |
| "sampling/importance_sampling_ratio/min": 0.007038592826575041, |
| "sampling/sampling_logp_difference/max": 4.9563469886779785, |
| "sampling/sampling_logp_difference/mean": 0.015743985772132874, |
| "step": 37, |
| "step_time": 69.43658219277859 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1024.0, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 1024.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.257904052734375, |
| "epoch": 1.5833333333333335, |
| "frac_reward_zero_std": 0.046875, |
| "grad_norm": 0.0776461511850357, |
| "learning_rate": 2.2944435358452453e-06, |
| "loss": -0.0001, |
| "num_tokens": 20271317.0, |
| "reward": 0.2907390892505646, |
| "reward_std": 0.2345331609249115, |
| "rewards/<lambda>/mean": 0.2907390892505646, |
| "rewards/<lambda>/std": 0.4613417983055115, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999305009841919, |
| "sampling/importance_sampling_ratio/min": 0.1541903167963028, |
| "sampling/sampling_logp_difference/max": 1.869567632675171, |
| "sampling/sampling_logp_difference/mean": 0.01533452421426773, |
| "step": 38, |
| "step_time": 60.22879173234105 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1021.875, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 888.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.259298924356699, |
| "epoch": 1.625, |
| "frac_reward_zero_std": 0.015625, |
| "grad_norm": 0.08768095821142197, |
| "learning_rate": 1.914208629421636e-06, |
| "loss": 0.0, |
| "num_tokens": 20861973.0, |
| "reward": 0.1866101324558258, |
| "reward_std": 0.23392625153064728, |
| "rewards/<lambda>/mean": 0.1866101324558258, |
| "rewards/<lambda>/std": 0.3986544907093048, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999127388000488, |
| "sampling/importance_sampling_ratio/min": 9.618316809678618e-09, |
| "sampling/sampling_logp_difference/max": 18.459596633911133, |
| "sampling/sampling_logp_difference/mean": 0.015485625714063644, |
| "step": 39, |
| "step_time": 64.56263257935643 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1017.828125, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 629.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.22701962105929852, |
| "epoch": 1.6666666666666665, |
| "frac_reward_zero_std": 0.0625, |
| "grad_norm": 0.14714667201042175, |
| "learning_rate": 1.563776336780595e-06, |
| "loss": -0.0, |
| "num_tokens": 21446373.0, |
| "reward": 0.3874555230140686, |
| "reward_std": 0.3079039454460144, |
| "rewards/<lambda>/mean": 0.3874555230140686, |
| "rewards/<lambda>/std": 0.49295976758003235, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999634027481079, |
| "sampling/importance_sampling_ratio/min": 0.007032718509435654, |
| "sampling/sampling_logp_difference/max": 4.957181930541992, |
| "sampling/sampling_logp_difference/mean": 0.013643273152410984, |
| "step": 40, |
| "step_time": 70.35990770533681 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1024.0, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 1024.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.20330810546875, |
| "epoch": 1.7083333333333335, |
| "frac_reward_zero_std": 0.3125, |
| "grad_norm": 0.050596319139003754, |
| "learning_rate": 1.2450163658259165e-06, |
| "loss": -0.0001, |
| "num_tokens": 22023365.0, |
| "reward": 0.5747976303100586, |
| "reward_std": 0.23263724148273468, |
| "rewards/<lambda>/mean": 0.5747976303100586, |
| "rewards/<lambda>/std": 0.49826157093048096, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000696182250977, |
| "sampling/importance_sampling_ratio/min": 0.1340048760175705, |
| "sampling/sampling_logp_difference/max": 2.0098791122436523, |
| "sampling/sampling_logp_difference/mean": 0.012190128676593304, |
| "step": 41, |
| "step_time": 56.359509252011776 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1024.0, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 1024.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.20894622802734375, |
| "epoch": 1.75, |
| "frac_reward_zero_std": 0.15625, |
| "grad_norm": 0.06408947706222534, |
| "learning_rate": 9.596294389214422e-07, |
| "loss": 0.0, |
| "num_tokens": 22598197.0, |
| "reward": 0.4889833331108093, |
| "reward_std": 0.3267000913619995, |
| "rewards/<lambda>/mean": 0.48898330330848694, |
| "rewards/<lambda>/std": 0.5056021213531494, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999138116836548, |
| "sampling/importance_sampling_ratio/min": 0.01976090297102928, |
| "sampling/sampling_logp_difference/max": 3.9240498542785645, |
| "sampling/sampling_logp_difference/mean": 0.012254208326339722, |
| "step": 42, |
| "step_time": 55.88660566881299 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1023.625, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 1000.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.18526224978268147, |
| "epoch": 1.7916666666666665, |
| "frac_reward_zero_std": 0.421875, |
| "grad_norm": 0.05824067071080208, |
| "learning_rate": 7.091382188014004e-07, |
| "loss": 0.0, |
| "num_tokens": 23189573.0, |
| "reward": 0.6663753986358643, |
| "reward_std": 0.1958390325307846, |
| "rewards/<lambda>/mean": 0.6663753986358643, |
| "rewards/<lambda>/std": 0.4757937490940094, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000879764556885, |
| "sampling/importance_sampling_ratio/min": 0.02108073979616165, |
| "sampling/sampling_logp_difference/max": 3.8593955039978027, |
| "sampling/sampling_logp_difference/mean": 0.011367494240403175, |
| "step": 43, |
| "step_time": 61.6868560872972 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1021.765625, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 881.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.1563689550384879, |
| "epoch": 1.8333333333333335, |
| "frac_reward_zero_std": 0.53125, |
| "grad_norm": 0.05948049575090408, |
| "learning_rate": 4.948791845058906e-07, |
| "loss": 0.0, |
| "num_tokens": 23779501.0, |
| "reward": 0.7701754570007324, |
| "reward_std": 0.15122410655021667, |
| "rewards/<lambda>/mean": 0.7701754570007324, |
| "rewards/<lambda>/std": 0.4250968396663666, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999828338623047, |
| "sampling/importance_sampling_ratio/min": 0.0004995743511244655, |
| "sampling/sampling_logp_difference/max": 7.601754188537598, |
| "sampling/sampling_logp_difference/mean": 0.009836211800575256, |
| "step": 44, |
| "step_time": 64.21834829077125 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 0.998046875, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 692.0, |
| "completions/mean_length": 1023.3515625, |
| "completions/mean_terminated_length": 692.0, |
| "completions/min_length": 692.0, |
| "completions/min_terminated_length": 692.0, |
| "entropy": 0.1624470753595233, |
| "epoch": 1.875, |
| "frac_reward_zero_std": 0.40625, |
| "grad_norm": 0.06368126720190048, |
| "learning_rate": 3.1799550068695616e-07, |
| "loss": 0.0004, |
| "num_tokens": 24377745.0, |
| "reward": 0.645195484161377, |
| "reward_std": 0.16846325993537903, |
| "rewards/<lambda>/mean": 0.645195484161377, |
| "rewards/<lambda>/std": 0.48446959257125854, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.0000487565994263, |
| "sampling/importance_sampling_ratio/min": 0.11990867555141449, |
| "sampling/sampling_logp_difference/max": 2.1210248470306396, |
| "sampling/sampling_logp_difference/mean": 0.010421509854495525, |
| "step": 45, |
| "step_time": 61.393697403371334 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1022.03125, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 898.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.16149251069873571, |
| "epoch": 1.9166666666666665, |
| "frac_reward_zero_std": 0.390625, |
| "grad_norm": 0.06683196127414703, |
| "learning_rate": 1.7943091833054704e-07, |
| "loss": 0.0, |
| "num_tokens": 24965185.0, |
| "reward": 0.7338901162147522, |
| "reward_std": 0.23456785082817078, |
| "rewards/<lambda>/mean": 0.7338901162147522, |
| "rewards/<lambda>/std": 0.44744160771369934, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999337196350098, |
| "sampling/importance_sampling_ratio/min": 0.07218174636363983, |
| "sampling/sampling_logp_difference/max": 2.628568172454834, |
| "sampling/sampling_logp_difference/mean": 0.010157747194170952, |
| "step": 46, |
| "step_time": 64.5227914750576 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1024.0, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 1024.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.22264862060546875, |
| "epoch": 1.9583333333333335, |
| "frac_reward_zero_std": 0.203125, |
| "grad_norm": 0.0897730141878128, |
| "learning_rate": 7.992473943667311e-08, |
| "loss": 0.0, |
| "num_tokens": 25558329.0, |
| "reward": 0.4105945825576782, |
| "reward_std": 0.22466689348220825, |
| "rewards/<lambda>/mean": 0.4105945825576782, |
| "rewards/<lambda>/std": 0.49798643589019775, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 0.9999638199806213, |
| "sampling/importance_sampling_ratio/min": 0.060054682195186615, |
| "sampling/sampling_logp_difference/max": 2.812499761581421, |
| "sampling/sampling_logp_difference/mean": 0.013261111453175545, |
| "step": 47, |
| "step_time": 59.11356810852885 |
| }, |
| { |
| "clip_ratio/high_max": 0.0, |
| "clip_ratio/high_mean": 0.0, |
| "clip_ratio/low_mean": 0.0, |
| "clip_ratio/low_min": 0.0, |
| "clip_ratio/region_mean": 0.0, |
| "completions/clipped_ratio": 1.0, |
| "completions/max_length": 1024.0, |
| "completions/max_terminated_length": 0.0, |
| "completions/mean_length": 1022.28125, |
| "completions/mean_terminated_length": 0.0, |
| "completions/min_length": 914.0, |
| "completions/min_terminated_length": 0.0, |
| "entropy": 0.2494364334270358, |
| "epoch": 2.0, |
| "frac_reward_zero_std": 0.125, |
| "grad_norm": 0.06464344263076782, |
| "learning_rate": 2.0007872523362668e-08, |
| "loss": -0.0, |
| "num_tokens": 26167217.0, |
| "reward": 0.31925156712532043, |
| "reward_std": 0.1734967827796936, |
| "rewards/<lambda>/mean": 0.31925156712532043, |
| "rewards/<lambda>/std": 0.4741312563419342, |
| "sampling/importance_sampling_ratio/max": 2.0, |
| "sampling/importance_sampling_ratio/mean": 1.000075340270996, |
| "sampling/importance_sampling_ratio/min": 0.011130619794130325, |
| "sampling/sampling_logp_difference/max": 4.498055458068848, |
| "sampling/sampling_logp_difference/mean": 0.014584287069737911, |
| "step": 48, |
| "step_time": 63.68918735533953 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 48, |
| "total_flos": 0.0, |
| "train_loss": 0.0, |
| "train_runtime": 2.496, |
| "train_samples_per_second": 1275.628, |
| "train_steps_per_second": 19.231 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 48, |
| "num_input_tokens_seen": 26167217, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|