| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 100, |
| "global_step": 54, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.09345794392523364, |
| "grad_norm": 27.934567781750822, |
| "learning_rate": 4.995258321842611e-06, |
| "logits/chosen": -1.3600707054138184, |
| "logits/rejected": -1.311720848083496, |
| "logps/chosen": -259.7884216308594, |
| "logps/rejected": -641.204833984375, |
| "loss": 0.5367130279541016, |
| "rewards/accuracies": 0.4906249940395355, |
| "rewards/chosen": -0.011346553452312946, |
| "rewards/margins": 2.010589599609375, |
| "rewards/rejected": -2.0219361782073975, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.18691588785046728, |
| "grad_norm": 33.47686763978394, |
| "learning_rate": 4.83118057351089e-06, |
| "logits/chosen": -1.4325159788131714, |
| "logits/rejected": -1.419487714767456, |
| "logps/chosen": -282.45062255859375, |
| "logps/rejected": -1006.796630859375, |
| "loss": 0.1962599277496338, |
| "rewards/accuracies": 0.8937499523162842, |
| "rewards/chosen": -1.5842396020889282, |
| "rewards/margins": 30.126033782958984, |
| "rewards/rejected": -31.710275650024414, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2803738317757009, |
| "grad_norm": 33.03025938367781, |
| "learning_rate": 4.447701436314176e-06, |
| "logits/chosen": -1.4119861125946045, |
| "logits/rejected": -1.4033880233764648, |
| "logps/chosen": -316.49224853515625, |
| "logps/rejected": -1806.5711669921875, |
| "loss": 0.13303208351135254, |
| "rewards/accuracies": 0.9312499761581421, |
| "rewards/chosen": -3.9324283599853516, |
| "rewards/margins": 79.23664093017578, |
| "rewards/rejected": -83.16907501220703, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.37383177570093457, |
| "grad_norm": 15.588810818394203, |
| "learning_rate": 3.880912432401265e-06, |
| "logits/chosen": -1.4167428016662598, |
| "logits/rejected": -1.4375219345092773, |
| "logps/chosen": -278.1930236816406, |
| "logps/rejected": -1543.1934814453125, |
| "loss": 0.1300720453262329, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -3.5365679264068604, |
| "rewards/margins": 69.07416534423828, |
| "rewards/rejected": -72.61072540283203, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.4672897196261682, |
| "grad_norm": 23.635861216384747, |
| "learning_rate": 3.184157475180208e-06, |
| "logits/chosen": -1.5420770645141602, |
| "logits/rejected": -1.5806173086166382, |
| "logps/chosen": -289.8417053222656, |
| "logps/rejected": -1373.1412353515625, |
| "loss": 0.09687024354934692, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -3.474010944366455, |
| "rewards/margins": 56.75503921508789, |
| "rewards/rejected": -60.22904968261719, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.5607476635514018, |
| "grad_norm": 27.742154267321354, |
| "learning_rate": 2.4230123536095746e-06, |
| "logits/chosen": -1.456063985824585, |
| "logits/rejected": -1.4878963232040405, |
| "logps/chosen": -284.5406188964844, |
| "logps/rejected": -1384.5396728515625, |
| "loss": 0.18257052898406984, |
| "rewards/accuracies": 0.9031249284744263, |
| "rewards/chosen": -3.6529622077941895, |
| "rewards/margins": 59.20906066894531, |
| "rewards/rejected": -62.86201858520508, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.6542056074766355, |
| "grad_norm": 19.195024194064924, |
| "learning_rate": 1.6691130013008514e-06, |
| "logits/chosen": -1.478035807609558, |
| "logits/rejected": -1.4903415441513062, |
| "logps/chosen": -294.15576171875, |
| "logps/rejected": -1432.6474609375, |
| "loss": 0.14389536380767823, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -3.8707540035247803, |
| "rewards/margins": 60.74583053588867, |
| "rewards/rejected": -64.6165771484375, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.7476635514018691, |
| "grad_norm": 36.27877245192158, |
| "learning_rate": 9.934134090518593e-07, |
| "logits/chosen": -1.4703152179718018, |
| "logits/rejected": -1.4725449085235596, |
| "logps/chosen": -310.2994689941406, |
| "logps/rejected": -1594.9718017578125, |
| "loss": 0.1617864489555359, |
| "rewards/accuracies": 0.9281250238418579, |
| "rewards/chosen": -4.363249778747559, |
| "rewards/margins": 70.96905517578125, |
| "rewards/rejected": -75.33231353759766, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.8411214953271028, |
| "grad_norm": 15.093660609578944, |
| "learning_rate": 4.5950771910944603e-07, |
| "logits/chosen": -1.4355796575546265, |
| "logits/rejected": -1.439159631729126, |
| "logps/chosen": -304.1833801269531, |
| "logps/rejected": -1558.446533203125, |
| "loss": 0.13369102478027345, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -3.854429006576538, |
| "rewards/margins": 70.72245025634766, |
| "rewards/rejected": -74.5768814086914, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.9345794392523364, |
| "grad_norm": 21.304476284345572, |
| "learning_rate": 1.1764499893210879e-07, |
| "logits/chosen": -1.4929989576339722, |
| "logits/rejected": -1.5164098739624023, |
| "logps/chosen": -278.53350830078125, |
| "logps/rejected": -1530.287841796875, |
| "loss": 0.14561424255371094, |
| "rewards/accuracies": 0.9312499761581421, |
| "rewards/chosen": -3.787602663040161, |
| "rewards/margins": 66.7558822631836, |
| "rewards/rejected": -70.54348754882812, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 54, |
| "total_flos": 4121088491520.0, |
| "train_loss": 0.18359575779349716, |
| "train_runtime": 542.9915, |
| "train_samples_per_second": 6.298, |
| "train_steps_per_second": 0.099 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 54, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 4121088491520.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|