| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9874476987447699, | |
| "eval_steps": 500, | |
| "global_step": 59, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.016736401673640166, | |
| "grad_norm": 3.1506166106625195, | |
| "learning_rate": 1.6666666666666664e-08, | |
| "logits/chosen": -0.16345731914043427, | |
| "logits/rejected": -0.16345731914043427, | |
| "logps/chosen": -1051.055419921875, | |
| "logps/pi_response": -1051.055419921875, | |
| "logps/ref_response": -1051.055419921875, | |
| "logps/rejected": -1051.055419921875, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.16736401673640167, | |
| "grad_norm": 34.149713113458255, | |
| "learning_rate": 9.860114570402053e-08, | |
| "logits/chosen": -0.17107006907463074, | |
| "logits/rejected": -0.17107006907463074, | |
| "logps/chosen": -1237.6085205078125, | |
| "logps/pi_response": -1237.5712890625, | |
| "logps/ref_response": -1047.64404296875, | |
| "logps/rejected": -1237.6085205078125, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -1.8992481231689453, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": -1.8992481231689453, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.33472803347280333, | |
| "grad_norm": 89.9356708413712, | |
| "learning_rate": 8.374915007591053e-08, | |
| "logits/chosen": -0.8497294187545776, | |
| "logits/rejected": -0.8497294187545776, | |
| "logps/chosen": -6867.37353515625, | |
| "logps/pi_response": -6867.2529296875, | |
| "logps/ref_response": -1047.505615234375, | |
| "logps/rejected": -6867.37353515625, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -58.19902420043945, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": -58.19902420043945, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.502092050209205, | |
| "grad_norm": 93.52643691744206, | |
| "learning_rate": 5.738232820012406e-08, | |
| "logits/chosen": -1.5894163846969604, | |
| "logits/rejected": -1.5894163846969604, | |
| "logps/chosen": -19478.552734375, | |
| "logps/pi_response": -19478.759765625, | |
| "logps/ref_response": -1048.0611572265625, | |
| "logps/rejected": -19478.552734375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -184.30494689941406, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": -184.30494689941406, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.6694560669456067, | |
| "grad_norm": 68.26794136278654, | |
| "learning_rate": 2.8496739886173992e-08, | |
| "logits/chosen": -1.8664979934692383, | |
| "logits/rejected": -1.8664979934692383, | |
| "logps/chosen": -30769.759765625, | |
| "logps/pi_response": -30769.974609375, | |
| "logps/ref_response": -1046.073486328125, | |
| "logps/rejected": -30769.759765625, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -297.2370300292969, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": -297.2370300292969, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.8368200836820083, | |
| "grad_norm": 67.0066669202387, | |
| "learning_rate": 6.947819411632222e-09, | |
| "logits/chosen": -2.048996686935425, | |
| "logits/rejected": -2.048996686935425, | |
| "logps/chosen": -35588.2109375, | |
| "logps/pi_response": -35588.0078125, | |
| "logps/ref_response": -1048.0765380859375, | |
| "logps/rejected": -35588.2109375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": -345.4007263183594, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": -345.4007263183594, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.9874476987447699, | |
| "step": 59, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6931423080169549, | |
| "train_runtime": 1323.6379, | |
| "train_samples_per_second": 11.546, | |
| "train_steps_per_second": 0.045 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 59, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |