| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9893390191897654, | |
| "eval_steps": 500, | |
| "global_step": 58, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 395.3694381713867, | |
| "epoch": 0.017057569296375266, | |
| "grad_norm": 0.24042215943336487, | |
| "kl": 0.0, | |
| "learning_rate": 5e-07, | |
| "loss": 0.048, | |
| "reward": 1.1350447088479996, | |
| "reward_std": 0.5315618552267551, | |
| "rewards/sum_reward": 1.1350447088479996, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 400.4135227203369, | |
| "epoch": 0.08528784648187633, | |
| "grad_norm": 0.23158641159534454, | |
| "kl": 0.0001577436923980713, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0631, | |
| "reward": 1.006835974752903, | |
| "reward_std": 0.5563086168840528, | |
| "rewards/sum_reward": 1.006835974752903, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 384.19622192382815, | |
| "epoch": 0.17057569296375266, | |
| "grad_norm": 161.1280059814453, | |
| "kl": 4.255555725097656, | |
| "learning_rate": 2.956412726139078e-06, | |
| "loss": 0.1142, | |
| "reward": 1.0516741514205932, | |
| "reward_std": 0.5486123532056808, | |
| "rewards/sum_reward": 1.0516741514205932, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 370.5531402587891, | |
| "epoch": 0.255863539445629, | |
| "grad_norm": 0.3267284035682678, | |
| "kl": 0.02659912109375, | |
| "learning_rate": 2.7836719084521715e-06, | |
| "loss": 0.0595, | |
| "reward": 1.1354911133646965, | |
| "reward_std": 0.5183316260576248, | |
| "rewards/sum_reward": 1.1354911133646965, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 355.5297027587891, | |
| "epoch": 0.3411513859275053, | |
| "grad_norm": 0.5259718894958496, | |
| "kl": 0.028741455078125, | |
| "learning_rate": 2.4946839873611927e-06, | |
| "loss": 0.0472, | |
| "reward": 1.1906250610947609, | |
| "reward_std": 0.5139306262135506, | |
| "rewards/sum_reward": 1.1906250610947609, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 372.9276954650879, | |
| "epoch": 0.42643923240938164, | |
| "grad_norm": 0.9172742366790771, | |
| "kl": 0.0341339111328125, | |
| "learning_rate": 2.1156192081791355e-06, | |
| "loss": 0.0427, | |
| "reward": 1.1500000417232514, | |
| "reward_std": 0.5094686262309551, | |
| "rewards/sum_reward": 1.1500000417232514, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 379.5109550476074, | |
| "epoch": 0.511727078891258, | |
| "grad_norm": 0.38280367851257324, | |
| "kl": 0.038134765625, | |
| "learning_rate": 1.6808050203829845e-06, | |
| "loss": 0.0477, | |
| "reward": 1.1505580887198448, | |
| "reward_std": 0.4839449178427458, | |
| "rewards/sum_reward": 1.1505580887198448, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 366.76407928466796, | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 0.34553492069244385, | |
| "kl": 0.04346923828125, | |
| "learning_rate": 1.2296174432791415e-06, | |
| "loss": 0.0465, | |
| "reward": 1.1466518342494965, | |
| "reward_std": 0.49975207149982454, | |
| "rewards/sum_reward": 1.1466518342494965, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 356.1212215423584, | |
| "epoch": 0.6823027718550106, | |
| "grad_norm": 0.8228252530097961, | |
| "kl": 0.0513427734375, | |
| "learning_rate": 8.029152419343472e-07, | |
| "loss": 0.0707, | |
| "reward": 1.1147321864962578, | |
| "reward_std": 0.5065454341471195, | |
| "rewards/sum_reward": 1.1147321864962578, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 384.7980072021484, | |
| "epoch": 0.767590618336887, | |
| "grad_norm": 1.0524908304214478, | |
| "kl": 0.070587158203125, | |
| "learning_rate": 4.3933982822017883e-07, | |
| "loss": 0.116, | |
| "reward": 1.1046875521540642, | |
| "reward_std": 0.5838725090026855, | |
| "rewards/sum_reward": 1.1046875521540642, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 399.55358734130857, | |
| "epoch": 0.8528784648187633, | |
| "grad_norm": 0.8683111667633057, | |
| "kl": 0.08956298828125, | |
| "learning_rate": 1.718159615201853e-07, | |
| "loss": 0.1583, | |
| "reward": 1.021428617835045, | |
| "reward_std": 0.6321207888424396, | |
| "rewards/sum_reward": 1.021428617835045, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 401.3645248413086, | |
| "epoch": 0.9381663113006397, | |
| "grad_norm": 0.8170827627182007, | |
| "kl": 0.095947265625, | |
| "learning_rate": 2.4570139579284723e-08, | |
| "loss": 0.1683, | |
| "reward": 1.0492187902331351, | |
| "reward_std": 0.6428232848644256, | |
| "rewards/sum_reward": 1.0492187902331351, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 401.34786860148114, | |
| "epoch": 0.9893390191897654, | |
| "kl": 0.09938557942708333, | |
| "reward": 1.052455407877763, | |
| "reward_std": 0.6334689930081367, | |
| "rewards/sum_reward": 1.052455407877763, | |
| "step": 58, | |
| "total_flos": 0.0, | |
| "train_loss": 0.08800288617354014, | |
| "train_runtime": 13838.5765, | |
| "train_samples_per_second": 0.542, | |
| "train_steps_per_second": 0.004 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 58, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |