| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 63, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "grad_norm": 132.26618246689608, |
| "learning_rate": 4.984280524733107e-07, |
| "logits/chosen": -0.8294057250022888, |
| "logits/rejected": -1.354197382926941, |
| "logps/chosen": -33.951271057128906, |
| "logps/rejected": -229.11785888671875, |
| "loss": 0.6625, |
| "rewards/accuracies": 0.4937499761581421, |
| "rewards/chosen": 0.003963456489145756, |
| "rewards/margins": 0.09601181745529175, |
| "rewards/rejected": -0.09204836189746857, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 38.476627705158, |
| "learning_rate": 4.4545787061700746e-07, |
| "logits/chosen": -0.7441146969795227, |
| "logits/rejected": -1.32632315158844, |
| "logps/chosen": -33.42034912109375, |
| "logps/rejected": -304.5281677246094, |
| "loss": 0.3492, |
| "rewards/accuracies": 0.8437500596046448, |
| "rewards/chosen": -0.023549042642116547, |
| "rewards/margins": 2.2787868976593018, |
| "rewards/rejected": -2.3023359775543213, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 30.725862184836895, |
| "learning_rate": 3.325697654887918e-07, |
| "logits/chosen": -0.6691964268684387, |
| "logits/rejected": -1.2474281787872314, |
| "logps/chosen": -34.52278137207031, |
| "logps/rejected": -312.0245666503906, |
| "loss": 0.2683, |
| "rewards/accuracies": 0.9781250357627869, |
| "rewards/chosen": -0.27767491340637207, |
| "rewards/margins": 5.5489301681518555, |
| "rewards/rejected": -5.826605796813965, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 14.314762288047737, |
| "learning_rate": 1.9436976651092142e-07, |
| "logits/chosen": -0.6005232334136963, |
| "logits/rejected": -1.1803635358810425, |
| "logps/chosen": -43.08976745605469, |
| "logps/rejected": -356.63897705078125, |
| "loss": 0.1523, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0808709859848022, |
| "rewards/margins": 8.968171119689941, |
| "rewards/rejected": -10.049041748046875, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 14.835498346670336, |
| "learning_rate": 7.322330470336313e-08, |
| "logits/chosen": -0.5658006072044373, |
| "logits/rejected": -1.1364529132843018, |
| "logps/chosen": -47.95671081542969, |
| "logps/rejected": -367.29815673828125, |
| "loss": 0.1166, |
| "rewards/accuracies": 0.9906249642372131, |
| "rewards/chosen": -1.539947748184204, |
| "rewards/margins": 9.938979148864746, |
| "rewards/rejected": -11.478927612304688, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 17.492078534272487, |
| "learning_rate": 6.268021954544095e-09, |
| "logits/chosen": -0.5260172486305237, |
| "logits/rejected": -1.1272974014282227, |
| "logps/chosen": -50.922725677490234, |
| "logps/rejected": -387.16107177734375, |
| "loss": 0.0769, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.94655179977417, |
| "rewards/margins": 11.078328132629395, |
| "rewards/rejected": -13.024880409240723, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 63, |
| "total_flos": 6808570429440.0, |
| "train_loss": 0.26195360412673346, |
| "train_runtime": 1524.2794, |
| "train_samples_per_second": 1.312, |
| "train_steps_per_second": 0.041 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 63, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6808570429440.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|