| { | |
| "best_global_step": 100, | |
| "best_metric": 7.878235010139178e-06, | |
| "best_model_checkpoint": "models/reward-model/checkpoint-100", | |
| "epoch": 3.0, | |
| "eval_steps": 50, | |
| "global_step": 111, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "accuracy": 0.475, | |
| "epoch": 0.273972602739726, | |
| "grad_norm": 30.625, | |
| "learning_rate": 9.000000000000001e-07, | |
| "loss": 0.6891, | |
| "margin": 0.01259765625, | |
| "max_reward": 0.8927734375, | |
| "mean_reward": 0.841064453125, | |
| "min_reward": 0.78935546875, | |
| "num_tokens": 33836.0, | |
| "step": 10 | |
| }, | |
| { | |
| "accuracy": 0.7, | |
| "epoch": 0.547945205479452, | |
| "grad_norm": 31.0, | |
| "learning_rate": 1.9000000000000002e-06, | |
| "loss": 0.6556, | |
| "margin": 0.08076171875, | |
| "max_reward": 0.940087890625, | |
| "mean_reward": 0.881591796875, | |
| "min_reward": 0.823095703125, | |
| "num_tokens": 68438.0, | |
| "step": 20 | |
| }, | |
| { | |
| "accuracy": 0.875, | |
| "epoch": 0.821917808219178, | |
| "grad_norm": 42.75, | |
| "learning_rate": 2.9e-06, | |
| "loss": 0.5442, | |
| "margin": 0.348681640625, | |
| "max_reward": 1.237109375, | |
| "mean_reward": 1.0571533203125, | |
| "min_reward": 0.877197265625, | |
| "num_tokens": 102129.0, | |
| "step": 30 | |
| }, | |
| { | |
| "accuracy": 0.9473684210526315, | |
| "epoch": 1.0821917808219177, | |
| "grad_norm": 33.5, | |
| "learning_rate": 3.900000000000001e-06, | |
| "loss": 0.3195, | |
| "margin": 1.2310598273026316, | |
| "max_reward": 2.8731496710526314, | |
| "mean_reward": 2.2488820929276314, | |
| "min_reward": 1.6246145148026316, | |
| "num_tokens": 134446.0, | |
| "step": 40 | |
| }, | |
| { | |
| "accuracy": 1.0, | |
| "epoch": 1.356164383561644, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 4.9000000000000005e-06, | |
| "loss": 0.0364, | |
| "margin": 6.142855834960938, | |
| "max_reward": 5.8197265625, | |
| "mean_reward": 2.7482986450195312, | |
| "min_reward": -0.3231292724609375, | |
| "num_tokens": 168483.0, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.356164383561644, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 9.938376024365425e-05, | |
| "eval_margin": 12.761437618371213, | |
| "eval_max_reward": 9.329545454545455, | |
| "eval_mean_reward": 2.9488266453598486, | |
| "eval_min_reward": -3.431892163825758, | |
| "eval_num_tokens": 168483.0, | |
| "eval_runtime": 1.3469, | |
| "eval_samples_per_second": 24.501, | |
| "eval_steps_per_second": 24.501, | |
| "step": 50 | |
| }, | |
| { | |
| "accuracy": 1.0, | |
| "epoch": 1.6301369863013697, | |
| "grad_norm": 2.86102294921875e-05, | |
| "learning_rate": 4.736217705571989e-06, | |
| "loss": 0.0, | |
| "margin": 17.22467498779297, | |
| "max_reward": 11.6734375, | |
| "mean_reward": 3.0611000061035156, | |
| "min_reward": -5.551237487792969, | |
| "num_tokens": 202410.0, | |
| "step": 60 | |
| }, | |
| { | |
| "accuracy": 1.0, | |
| "epoch": 1.904109589041096, | |
| "grad_norm": 0.2412109375, | |
| "learning_rate": 3.895609305067162e-06, | |
| "loss": 0.0001, | |
| "margin": 19.885546875, | |
| "max_reward": 12.878125, | |
| "mean_reward": 2.9353515625, | |
| "min_reward": -7.007421875, | |
| "num_tokens": 236144.0, | |
| "step": 70 | |
| }, | |
| { | |
| "accuracy": 1.0, | |
| "epoch": 2.1643835616438354, | |
| "grad_norm": 0.00116729736328125, | |
| "learning_rate": 2.6929386553166165e-06, | |
| "loss": 0.0, | |
| "margin": 19.66786595394737, | |
| "max_reward": 12.293071546052632, | |
| "mean_reward": 2.4591385690789473, | |
| "min_reward": -7.374794407894737, | |
| "num_tokens": 268578.0, | |
| "step": 80 | |
| }, | |
| { | |
| "accuracy": 1.0, | |
| "epoch": 2.4383561643835616, | |
| "grad_norm": 2.396106719970703e-05, | |
| "learning_rate": 1.4402140232253486e-06, | |
| "loss": 0.0, | |
| "margin": 20.334765625, | |
| "max_reward": 12.90859375, | |
| "mean_reward": 2.7412109375, | |
| "min_reward": -7.426171875, | |
| "num_tokens": 303010.0, | |
| "step": 90 | |
| }, | |
| { | |
| "accuracy": 1.0, | |
| "epoch": 2.712328767123288, | |
| "grad_norm": 0.00677490234375, | |
| "learning_rate": 4.624291562079719e-07, | |
| "loss": 0.0, | |
| "margin": 19.507958984375, | |
| "max_reward": 12.3116943359375, | |
| "mean_reward": 2.55771484375, | |
| "min_reward": -7.1962646484375, | |
| "num_tokens": 336849.0, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.712328767123288, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 7.878235010139178e-06, | |
| "eval_margin": 19.414299242424242, | |
| "eval_max_reward": 12.067708333333334, | |
| "eval_mean_reward": 2.360558712121212, | |
| "eval_min_reward": -7.346590909090909, | |
| "eval_num_tokens": 336849.0, | |
| "eval_runtime": 1.3603, | |
| "eval_samples_per_second": 24.26, | |
| "eval_steps_per_second": 24.26, | |
| "step": 100 | |
| }, | |
| { | |
| "accuracy": 1.0, | |
| "epoch": 2.9863013698630136, | |
| "grad_norm": 6.866455078125e-05, | |
| "learning_rate": 1.3250310963527358e-08, | |
| "loss": 0.0, | |
| "margin": 20.3682373046875, | |
| "max_reward": 12.6564453125, | |
| "mean_reward": 2.47232666015625, | |
| "min_reward": -7.7117919921875, | |
| "num_tokens": 370214.0, | |
| "step": 110 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 111, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3167290892685312.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |