| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.3333333333333333, | |
| "eval_steps": 100, | |
| "global_step": 100, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": 0.5337432026863098, | |
| "learning_rate": 0.0004999965616101344, | |
| "logits/chosen": 0.23934367299079895, | |
| "logits/rejected": 0.42762428522109985, | |
| "logps/chosen": -113.45499420166016, | |
| "logps/rejected": -75.61299896240234, | |
| "loss": 0.271, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.9617708325386047, | |
| "rewards/margins": 3.3194146156311035, | |
| "rewards/rejected": -2.3576436042785645, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 0.0004492771113291383, | |
| "learning_rate": 0.0004999690550586881, | |
| "logits/chosen": 0.39065027236938477, | |
| "logits/rejected": 0.9774702191352844, | |
| "logps/chosen": -103.68326568603516, | |
| "logps/rejected": -159.09165954589844, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.5827335119247437, | |
| "rewards/margins": 12.054422378540039, | |
| "rewards/rejected": -10.471688270568848, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.009628095664083958, | |
| "learning_rate": 0.0004999140449822787, | |
| "logits/chosen": 0.39641499519348145, | |
| "logits/rejected": 1.1506410837173462, | |
| "logps/chosen": -112.4819564819336, | |
| "logps/rejected": -203.46591186523438, | |
| "loss": 0.0144, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": 0.7971093654632568, | |
| "rewards/margins": 15.904172897338867, | |
| "rewards/rejected": -15.107061386108398, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 0.16114771366119385, | |
| "learning_rate": 0.0004998315374335394, | |
| "logits/chosen": 0.42631012201309204, | |
| "logits/rejected": 1.436499834060669, | |
| "logps/chosen": -120.41705322265625, | |
| "logps/rejected": -188.07252502441406, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.19870629906654358, | |
| "rewards/margins": 13.751760482788086, | |
| "rewards/rejected": -13.553054809570312, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 2.7873742510564625e-05, | |
| "learning_rate": 0.0004997215414905875, | |
| "logits/chosen": 0.45363932847976685, | |
| "logits/rejected": 1.7247793674468994, | |
| "logps/chosen": -107.3564453125, | |
| "logps/rejected": -212.8771514892578, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.316047191619873, | |
| "rewards/margins": 17.600120544433594, | |
| "rewards/rejected": -16.284074783325195, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.3321376829699147e-06, | |
| "learning_rate": 0.0004995840692560257, | |
| "logits/chosen": 0.3636583983898163, | |
| "logits/rejected": 1.690006971359253, | |
| "logps/chosen": -117.81230163574219, | |
| "logps/rejected": -231.50650024414062, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.40089258551597595, | |
| "rewards/margins": 18.288105010986328, | |
| "rewards/rejected": -17.8872127532959, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9333333333333333, | |
| "grad_norm": 9.223950655723456e-06, | |
| "learning_rate": 0.0004994191358556106, | |
| "logits/chosen": 0.4559127390384674, | |
| "logits/rejected": 1.189163088798523, | |
| "logps/chosen": -108.7114486694336, | |
| "logps/rejected": -249.8244171142578, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.3001081943511963, | |
| "rewards/margins": 20.991580963134766, | |
| "rewards/rejected": -19.691471099853516, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0666666666666667, | |
| "grad_norm": 1.353669034642735e-07, | |
| "learning_rate": 0.0004992267594365879, | |
| "logits/chosen": 0.617511510848999, | |
| "logits/rejected": 1.2590217590332031, | |
| "logps/chosen": -98.4377670288086, | |
| "logps/rejected": -260.7948913574219, | |
| "loss": 0.0018, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.062624216079712, | |
| "rewards/margins": 22.795236587524414, | |
| "rewards/rejected": -20.73261070251465, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 7.2894854383775964e-06, | |
| "learning_rate": 0.0004990069611656963, | |
| "logits/chosen": 0.8833354115486145, | |
| "logits/rejected": 1.289421796798706, | |
| "logps/chosen": -99.6741943359375, | |
| "logps/rejected": -247.78750610351562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.3024544715881348, | |
| "rewards/margins": 21.798070907592773, | |
| "rewards/rejected": -19.49561882019043, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 5.883700850972673e-06, | |
| "learning_rate": 0.0004987597652268379, | |
| "logits/chosen": 0.957192599773407, | |
| "logits/rejected": 1.1422635316848755, | |
| "logps/chosen": -107.17437744140625, | |
| "logps/rejected": -239.02804565429688, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.3617346286773682, | |
| "rewards/margins": 20.076194763183594, | |
| "rewards/rejected": -18.714458465576172, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "eval_logits/chosen": 0.9479974508285522, | |
| "eval_logits/rejected": 1.0917534828186035, | |
| "eval_logps/chosen": -110.24896240234375, | |
| "eval_logps/rejected": -238.44570922851562, | |
| "eval_loss": 8.311428246088326e-05, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 1.203629493713379, | |
| "eval_rewards/margins": 19.736553192138672, | |
| "eval_rewards/rejected": -18.532922744750977, | |
| "eval_runtime": 161.9066, | |
| "eval_samples_per_second": 1.235, | |
| "eval_steps_per_second": 1.235, | |
| "step": 100 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |