| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 225, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1342281879194631, | |
| "grad_norm": 36.80321502685547, | |
| "kl": 0.20292969048023224, | |
| "learning_rate": 9e-08, | |
| "logits/chosen": -3776512.0, | |
| "logits/rejected": 1030144.0, | |
| "logps/chosen": -345.825, | |
| "logps/rejected": -421.15, | |
| "loss": 0.5017, | |
| "num_unsafe": 0.5, | |
| "rewards/chosen": -0.005364990234375, | |
| "rewards/margins": -0.013800048828125, | |
| "rewards/rejected": 0.00843505859375, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2684563758389262, | |
| "grad_norm": 39.34665298461914, | |
| "kl": 0.10800781100988388, | |
| "learning_rate": 1.8999999999999998e-07, | |
| "logits/chosen": 8857600.0, | |
| "logits/rejected": 8883404.8, | |
| "logps/chosen": -342.925, | |
| "logps/rejected": -384.2, | |
| "loss": 0.4963, | |
| "num_unsafe": 0.699999988079071, | |
| "rewards/chosen": 0.0006647109985351562, | |
| "rewards/margins": 0.030388832092285156, | |
| "rewards/rejected": -0.02972412109375, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.40268456375838924, | |
| "grad_norm": 30.005146026611328, | |
| "kl": 0.03125, | |
| "learning_rate": 2.9e-07, | |
| "logits/chosen": 11644723.2, | |
| "logits/rejected": 16304537.6, | |
| "logps/chosen": -346.1, | |
| "logps/rejected": -386.15, | |
| "loss": 0.4873, | |
| "num_unsafe": 0.6499999761581421, | |
| "rewards/chosen": 0.016363525390625, | |
| "rewards/margins": 0.10656433105468749, | |
| "rewards/rejected": -0.0902008056640625, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5369127516778524, | |
| "grad_norm": 26.460351943969727, | |
| "kl": 0.0, | |
| "learning_rate": 3.8999999999999997e-07, | |
| "logits/chosen": 2756300.8, | |
| "logits/rejected": 3816652.8, | |
| "logps/chosen": -378.05, | |
| "logps/rejected": -397.2, | |
| "loss": 0.4541, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": -0.070062255859375, | |
| "rewards/margins": 0.396392822265625, | |
| "rewards/rejected": -0.466455078125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6711409395973155, | |
| "grad_norm": 20.090633392333984, | |
| "kl": 0.0, | |
| "learning_rate": 4.9e-07, | |
| "logits/chosen": 23447142.4, | |
| "logits/rejected": 26068582.4, | |
| "logps/chosen": -372.375, | |
| "logps/rejected": -427.3, | |
| "loss": 0.4115, | |
| "num_unsafe": 0.25, | |
| "rewards/chosen": -0.18045654296875, | |
| "rewards/margins": 0.9100219726562501, | |
| "rewards/rejected": -1.090478515625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.8053691275167785, | |
| "grad_norm": 29.12238121032715, | |
| "kl": 0.0, | |
| "learning_rate": 5.9e-07, | |
| "logits/chosen": 5931827.2, | |
| "logits/rejected": 13681459.2, | |
| "logps/chosen": -318.65, | |
| "logps/rejected": -371.35, | |
| "loss": 0.3601, | |
| "num_unsafe": 0.4000000059604645, | |
| "rewards/chosen": 0.175537109375, | |
| "rewards/margins": 1.456591796875, | |
| "rewards/rejected": -1.2810546875, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9395973154362416, | |
| "grad_norm": 13.03518009185791, | |
| "kl": 0.0, | |
| "learning_rate": 6.9e-07, | |
| "logits/chosen": 5972787.2, | |
| "logits/rejected": 2752512.0, | |
| "logps/chosen": -301.875, | |
| "logps/rejected": -392.0, | |
| "loss": 0.2818, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": 0.56591796875, | |
| "rewards/margins": 2.639208984375, | |
| "rewards/rejected": -2.073291015625, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0671140939597314, | |
| "grad_norm": 19.125137329101562, | |
| "kl": 0.00657894741743803, | |
| "learning_rate": 7.9e-07, | |
| "logits/chosen": 4748773.052631579, | |
| "logits/rejected": 10441135.157894736, | |
| "logps/chosen": -334.7631578947368, | |
| "logps/rejected": -413.6842105263158, | |
| "loss": 0.2538, | |
| "num_unsafe": 0.5789473652839661, | |
| "rewards/chosen": 0.8779296875, | |
| "rewards/margins": 3.0099198190789473, | |
| "rewards/rejected": -2.1319901315789473, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.2013422818791946, | |
| "grad_norm": 21.394241333007812, | |
| "kl": 0.18906250596046448, | |
| "learning_rate": 8.9e-07, | |
| "logits/chosen": 8029593.6, | |
| "logits/rejected": 4953088.0, | |
| "logps/chosen": -333.6625, | |
| "logps/rejected": -412.95, | |
| "loss": 0.209, | |
| "num_unsafe": 0.699999988079071, | |
| "rewards/chosen": 1.444970703125, | |
| "rewards/margins": 3.844580078125, | |
| "rewards/rejected": -2.399609375, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.3355704697986577, | |
| "grad_norm": 8.02198314666748, | |
| "kl": 0.20468750596046448, | |
| "learning_rate": 9.9e-07, | |
| "logits/chosen": 8033792.0, | |
| "logits/rejected": 13608140.8, | |
| "logps/chosen": -271.025, | |
| "logps/rejected": -416.6, | |
| "loss": 0.1474, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": 2.274462890625, | |
| "rewards/margins": 5.532470703125, | |
| "rewards/rejected": -3.2580078125, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4697986577181208, | |
| "grad_norm": 8.773207664489746, | |
| "kl": 0.3812499940395355, | |
| "learning_rate": 9.872634363932886e-07, | |
| "logits/chosen": 5904793.6, | |
| "logits/rejected": 10031923.2, | |
| "logps/chosen": -401.7, | |
| "logps/rejected": -434.35, | |
| "loss": 0.1696, | |
| "num_unsafe": 0.550000011920929, | |
| "rewards/chosen": 2.0365234375, | |
| "rewards/margins": 5.1421875, | |
| "rewards/rejected": -3.1056640625, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.604026845637584, | |
| "grad_norm": 2.1352667808532715, | |
| "kl": 0.725781261920929, | |
| "learning_rate": 9.440682244067722e-07, | |
| "logits/chosen": 19757875.2, | |
| "logits/rejected": 22788505.6, | |
| "logps/chosen": -301.825, | |
| "logps/rejected": -432.2, | |
| "loss": 0.1174, | |
| "num_unsafe": 0.5, | |
| "rewards/chosen": 2.513671875, | |
| "rewards/margins": 6.514453125, | |
| "rewards/rejected": -4.00078125, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.738255033557047, | |
| "grad_norm": 3.652601480484009, | |
| "kl": 0.5015624761581421, | |
| "learning_rate": 8.729705727120911e-07, | |
| "logits/chosen": 17581260.8, | |
| "logits/rejected": 17930649.6, | |
| "logps/chosen": -337.55, | |
| "logps/rejected": -427.85, | |
| "loss": 0.1357, | |
| "num_unsafe": 0.25, | |
| "rewards/chosen": 2.494921875, | |
| "rewards/margins": 6.15390625, | |
| "rewards/rejected": -3.658984375, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.87248322147651, | |
| "grad_norm": 2.743739604949951, | |
| "kl": 1.01171875, | |
| "learning_rate": 7.78437808244094e-07, | |
| "logits/chosen": 2695168.0, | |
| "logits/rejected": -2059059.2, | |
| "logps/chosen": -287.05, | |
| "logps/rejected": -410.7, | |
| "loss": 0.1098, | |
| "num_unsafe": 0.550000011920929, | |
| "rewards/chosen": 2.70234375, | |
| "rewards/margins": 6.565234374999999, | |
| "rewards/rejected": -3.862890625, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.1540242433547974, | |
| "kl": 0.6759868264198303, | |
| "learning_rate": 6.664097722614933e-07, | |
| "logits/chosen": 9126534.736842105, | |
| "logits/rejected": 11134652.631578946, | |
| "logps/chosen": -306.94736842105266, | |
| "logps/rejected": -413.3157894736842, | |
| "loss": 0.0889, | |
| "num_unsafe": 0.6315789222717285, | |
| "rewards/chosen": 3.2284128289473686, | |
| "rewards/margins": 7.489103618421053, | |
| "rewards/rejected": -4.260690789473684, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.134228187919463, | |
| "grad_norm": 3.1307098865509033, | |
| "kl": 1.0421874523162842, | |
| "learning_rate": 5.439255982753717e-07, | |
| "logits/chosen": -1861222.4, | |
| "logits/rejected": 2562252.8, | |
| "logps/chosen": -312.7125, | |
| "logps/rejected": -468.6, | |
| "loss": 0.0885, | |
| "num_unsafe": 0.5, | |
| "rewards/chosen": 3.3125, | |
| "rewards/margins": 8.0171875, | |
| "rewards/rejected": -4.7046875, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.2684563758389262, | |
| "grad_norm": 1.1788336038589478, | |
| "kl": 0.06875000149011612, | |
| "learning_rate": 4.1868141740255817e-07, | |
| "logits/chosen": 9242316.8, | |
| "logits/rejected": 9269657.6, | |
| "logps/chosen": -309.7, | |
| "logps/rejected": -428.95, | |
| "loss": 0.0865, | |
| "num_unsafe": 0.699999988079071, | |
| "rewards/chosen": 3.3330078125, | |
| "rewards/margins": 7.8982421875, | |
| "rewards/rejected": -4.565234375, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.402684563758389, | |
| "grad_norm": 1.8654648065567017, | |
| "kl": 0.515625, | |
| "learning_rate": 2.985467821431687e-07, | |
| "logits/chosen": 11762073.6, | |
| "logits/rejected": 16133324.8, | |
| "logps/chosen": -313.075, | |
| "logps/rejected": -435.2, | |
| "loss": 0.0958, | |
| "num_unsafe": 0.6499999761581421, | |
| "rewards/chosen": 3.34765625, | |
| "rewards/margins": 8.335546875, | |
| "rewards/rejected": -4.987890625, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.5369127516778525, | |
| "grad_norm": 2.7622787952423096, | |
| "kl": 0.6656249761581421, | |
| "learning_rate": 1.9107019345483288e-07, | |
| "logits/chosen": 3596492.8, | |
| "logits/rejected": 4794982.4, | |
| "logps/chosen": -345.0, | |
| "logps/rejected": -441.7, | |
| "loss": 0.0803, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": 3.247265625, | |
| "rewards/margins": 8.13984375, | |
| "rewards/rejected": -4.892578125, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.6711409395973154, | |
| "grad_norm": 1.9276018142700195, | |
| "kl": 0.22812500596046448, | |
| "learning_rate": 1.030048006760823e-07, | |
| "logits/chosen": 22071296.0, | |
| "logits/rejected": 22795059.2, | |
| "logps/chosen": -341.225, | |
| "logps/rejected": -464.8, | |
| "loss": 0.0984, | |
| "num_unsafe": 0.25, | |
| "rewards/chosen": 2.9384765625, | |
| "rewards/margins": 7.8150390625, | |
| "rewards/rejected": -4.8765625, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.8053691275167782, | |
| "grad_norm": 1.9921404123306274, | |
| "kl": 0.30156248807907104, | |
| "learning_rate": 3.9884076317064807e-08, | |
| "logits/chosen": 4773068.8, | |
| "logits/rejected": 10355916.8, | |
| "logps/chosen": -291.175, | |
| "logps/rejected": -402.8, | |
| "loss": 0.1062, | |
| "num_unsafe": 0.4000000059604645, | |
| "rewards/chosen": 2.92861328125, | |
| "rewards/margins": 7.34970703125, | |
| "rewards/rejected": -4.42109375, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.9395973154362416, | |
| "grad_norm": 2.2024121284484863, | |
| "kl": 0.7749999761581421, | |
| "learning_rate": 5.674127631043024e-09, | |
| "logits/chosen": 4405657.6, | |
| "logits/rejected": -535833.6, | |
| "logps/chosen": -272.775, | |
| "logps/rejected": -421.8, | |
| "loss": 0.0643, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": 3.49609375, | |
| "rewards/margins": 8.56484375, | |
| "rewards/rejected": -5.06875, | |
| "step": 220 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 225, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 300.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |