| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.255639097744361, | |
| "eval_steps": 200, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07518796992481203, | |
| "grad_norm": 0.00031717625653774825, | |
| "learning_rate": 4.906015037593986e-06, | |
| "logits/chosen": -3.08984375, | |
| "logits/rejected": -3.359375, | |
| "logps/chosen": -759.5999755859375, | |
| "logps/rejected": -601.7999877929688, | |
| "loss": 0.0505, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": 3.9212889671325684, | |
| "rewards/margins": 19.23046875, | |
| "rewards/rejected": -15.315332412719727, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.15037593984962405, | |
| "grad_norm": 2.5674383538843824e-13, | |
| "learning_rate": 4.81203007518797e-06, | |
| "logits/chosen": -3.37890625, | |
| "logits/rejected": -3.901562452316284, | |
| "logps/chosen": -797.4000244140625, | |
| "logps/rejected": -834.2000122070312, | |
| "loss": 0.0018, | |
| "rewards/accuracies": 0.9984375238418579, | |
| "rewards/chosen": 0.5521484613418579, | |
| "rewards/margins": 38.98125076293945, | |
| "rewards/rejected": -38.412498474121094, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.22556390977443608, | |
| "grad_norm": 4.184555188916154e-12, | |
| "learning_rate": 4.718045112781955e-06, | |
| "logits/chosen": -3.237499952316284, | |
| "logits/rejected": -4.165625095367432, | |
| "logps/chosen": -774.5999755859375, | |
| "logps/rejected": -944.7999877929688, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.255664110183716, | |
| "rewards/margins": 53.625, | |
| "rewards/rejected": -50.36249923706055, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3007518796992481, | |
| "grad_norm": 3.674299650107461e-07, | |
| "learning_rate": 4.62406015037594e-06, | |
| "logits/chosen": -3.0601563453674316, | |
| "logits/rejected": -4.301562309265137, | |
| "logps/chosen": -744.2000122070312, | |
| "logps/rejected": -968.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.635937690734863, | |
| "rewards/margins": 58.375, | |
| "rewards/rejected": -51.724998474121094, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.37593984962406013, | |
| "grad_norm": 2.9469468290533765e-13, | |
| "learning_rate": 4.530075187969925e-06, | |
| "logits/chosen": -3.035937547683716, | |
| "logits/rejected": -4.3359375, | |
| "logps/chosen": -754.0, | |
| "logps/rejected": -981.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.212500095367432, | |
| "rewards/margins": 58.912498474121094, | |
| "rewards/rejected": -52.724998474121094, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.45112781954887216, | |
| "grad_norm": 2.450421719466682e-10, | |
| "learning_rate": 4.43609022556391e-06, | |
| "logits/chosen": -3.0367188453674316, | |
| "logits/rejected": -4.3203125, | |
| "logps/chosen": -740.0, | |
| "logps/rejected": -961.7999877929688, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.285937309265137, | |
| "rewards/margins": 57.474998474121094, | |
| "rewards/rejected": -51.1875, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 1.6351303999698253e-10, | |
| "learning_rate": 4.342105263157895e-06, | |
| "logits/chosen": -3.0234375, | |
| "logits/rejected": -4.3046875, | |
| "logps/chosen": -754.0, | |
| "logps/rejected": -969.4000244140625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.207812309265137, | |
| "rewards/margins": 57.662498474121094, | |
| "rewards/rejected": -51.412498474121094, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.6015037593984962, | |
| "grad_norm": 1.5915083665281199e-09, | |
| "learning_rate": 4.24812030075188e-06, | |
| "logits/chosen": -3.03125, | |
| "logits/rejected": -4.318749904632568, | |
| "logps/chosen": -748.2000122070312, | |
| "logps/rejected": -958.7999877929688, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.248437404632568, | |
| "rewards/margins": 57.525001525878906, | |
| "rewards/rejected": -51.25, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6766917293233082, | |
| "grad_norm": 4.297014302694241e-12, | |
| "learning_rate": 4.1541353383458646e-06, | |
| "logits/chosen": -3.0406250953674316, | |
| "logits/rejected": -4.279687404632568, | |
| "logps/chosen": -741.4000244140625, | |
| "logps/rejected": -961.4000244140625, | |
| "loss": 0.0132, | |
| "rewards/accuracies": 0.9906250238418579, | |
| "rewards/chosen": 6.785937309265137, | |
| "rewards/margins": 57.4375, | |
| "rewards/rejected": -50.625, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7518796992481203, | |
| "grad_norm": 9.095973454791659e-12, | |
| "learning_rate": 4.06015037593985e-06, | |
| "logits/chosen": -3.055468797683716, | |
| "logits/rejected": -4.318749904632568, | |
| "logps/chosen": -733.0, | |
| "logps/rejected": -968.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.356249809265137, | |
| "rewards/margins": 59.5625, | |
| "rewards/rejected": -52.224998474121094, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7518796992481203, | |
| "eval_logits/chosen": -3.058178186416626, | |
| "eval_logits/rejected": -4.299867153167725, | |
| "eval_logps/chosen": -741.7021484375, | |
| "eval_logps/rejected": -972.0850830078125, | |
| "eval_loss": 2.180002622864663e-09, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 7.262632846832275, | |
| "eval_rewards/margins": 58.87765884399414, | |
| "eval_rewards/rejected": -51.62765884399414, | |
| "eval_runtime": 8.4937, | |
| "eval_samples_per_second": 176.601, | |
| "eval_score": -0.6606304049491882, | |
| "eval_steps_per_second": 5.533, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8270676691729323, | |
| "grad_norm": 2.3694597465927517e-14, | |
| "learning_rate": 3.966165413533835e-06, | |
| "logits/chosen": -3.057812452316284, | |
| "logits/rejected": -4.2890625, | |
| "logps/chosen": -739.0, | |
| "logps/rejected": -973.4000244140625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.339062690734863, | |
| "rewards/margins": 58.67499923706055, | |
| "rewards/rejected": -51.32500076293945, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9022556390977443, | |
| "grad_norm": 4.7239515256796625e-09, | |
| "learning_rate": 3.87218045112782e-06, | |
| "logits/chosen": -3.0648436546325684, | |
| "logits/rejected": -4.301562309265137, | |
| "logps/chosen": -739.4000244140625, | |
| "logps/rejected": -980.4000244140625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.296875, | |
| "rewards/margins": 59.57500076293945, | |
| "rewards/rejected": -52.25, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9774436090225563, | |
| "grad_norm": 1.1010824468107971e-09, | |
| "learning_rate": 3.778195488721805e-06, | |
| "logits/chosen": -3.059375047683716, | |
| "logits/rejected": -4.317187309265137, | |
| "logps/chosen": -730.0, | |
| "logps/rejected": -975.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.485937595367432, | |
| "rewards/margins": 60.162498474121094, | |
| "rewards/rejected": -52.67499923706055, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 5.510844973332184e-09, | |
| "learning_rate": 3.6842105263157896e-06, | |
| "logits/chosen": -3.06640625, | |
| "logits/rejected": -4.3046875, | |
| "logps/chosen": -736.4000244140625, | |
| "logps/rejected": -961.5999755859375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.443749904632568, | |
| "rewards/margins": 58.42499923706055, | |
| "rewards/rejected": -51.025001525878906, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.1278195488721805, | |
| "grad_norm": 1.0805968862929627e-08, | |
| "learning_rate": 3.590225563909775e-06, | |
| "logits/chosen": -3.067187547683716, | |
| "logits/rejected": -4.329687595367432, | |
| "logps/chosen": -737.0, | |
| "logps/rejected": -978.2000122070312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.373437404632568, | |
| "rewards/margins": 59.849998474121094, | |
| "rewards/rejected": -52.474998474121094, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.2030075187969924, | |
| "grad_norm": 1.8280615029499892e-13, | |
| "learning_rate": 3.4962406015037596e-06, | |
| "logits/chosen": -3.063281297683716, | |
| "logits/rejected": -4.317187309265137, | |
| "logps/chosen": -739.2000122070312, | |
| "logps/rejected": -977.2000122070312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.349999904632568, | |
| "rewards/margins": 59.67499923706055, | |
| "rewards/rejected": -52.337501525878906, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.2781954887218046, | |
| "grad_norm": 8.685091807936255e-12, | |
| "learning_rate": 3.4022556390977448e-06, | |
| "logits/chosen": -3.057812452316284, | |
| "logits/rejected": -4.293749809265137, | |
| "logps/chosen": -740.4000244140625, | |
| "logps/rejected": -961.2000122070312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.3203125, | |
| "rewards/margins": 58.587501525878906, | |
| "rewards/rejected": -51.25, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.3533834586466165, | |
| "grad_norm": 1.6244164681709312e-07, | |
| "learning_rate": 3.3082706766917295e-06, | |
| "logits/chosen": -3.059375047683716, | |
| "logits/rejected": -4.295312404632568, | |
| "logps/chosen": -738.2000122070312, | |
| "logps/rejected": -958.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.278124809265137, | |
| "rewards/margins": 58.337501525878906, | |
| "rewards/rejected": -51.04999923706055, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.4285714285714286, | |
| "grad_norm": 3.8272192733826815e-13, | |
| "learning_rate": 3.2142857142857147e-06, | |
| "logits/chosen": -3.0648436546325684, | |
| "logits/rejected": -4.318749904632568, | |
| "logps/chosen": -731.0, | |
| "logps/rejected": -984.4000244140625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.357812404632568, | |
| "rewards/margins": 60.38750076293945, | |
| "rewards/rejected": -53.04999923706055, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.5037593984962405, | |
| "grad_norm": 6.450130800887369e-09, | |
| "learning_rate": 3.1203007518796995e-06, | |
| "logits/chosen": -3.0570311546325684, | |
| "logits/rejected": -4.317187309265137, | |
| "logps/chosen": -735.5999755859375, | |
| "logps/rejected": -965.5999755859375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.471875190734863, | |
| "rewards/margins": 59.13750076293945, | |
| "rewards/rejected": -51.650001525878906, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.5037593984962405, | |
| "eval_logits/chosen": -3.0611701011657715, | |
| "eval_logits/rejected": -4.303191661834717, | |
| "eval_logps/chosen": -741.872314453125, | |
| "eval_logps/rejected": -972.85107421875, | |
| "eval_loss": 1.5668466524232372e-09, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 7.266622543334961, | |
| "eval_rewards/margins": 58.98404312133789, | |
| "eval_rewards/rejected": -51.70744705200195, | |
| "eval_runtime": 8.4851, | |
| "eval_samples_per_second": 176.78, | |
| "eval_score": -0.6956531405448914, | |
| "eval_steps_per_second": 5.539, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.5789473684210527, | |
| "grad_norm": 1.5739247094468196e-10, | |
| "learning_rate": 3.0263157894736843e-06, | |
| "logits/chosen": -3.063281297683716, | |
| "logits/rejected": -4.301562309265137, | |
| "logps/chosen": -732.5999755859375, | |
| "logps/rejected": -961.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.067187309265137, | |
| "rewards/margins": 58.025001525878906, | |
| "rewards/rejected": -50.95000076293945, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.6541353383458648, | |
| "grad_norm": 5.037244548596992e-09, | |
| "learning_rate": 2.9323308270676694e-06, | |
| "logits/chosen": -3.0679688453674316, | |
| "logits/rejected": -4.328125, | |
| "logps/chosen": -735.5999755859375, | |
| "logps/rejected": -983.5999755859375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.542187690734863, | |
| "rewards/margins": 60.11249923706055, | |
| "rewards/rejected": -52.54999923706055, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.7293233082706767, | |
| "grad_norm": 1.907719763871417e-13, | |
| "learning_rate": 2.8383458646616546e-06, | |
| "logits/chosen": -3.065624952316284, | |
| "logits/rejected": -4.318749904632568, | |
| "logps/chosen": -732.0, | |
| "logps/rejected": -971.4000244140625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.348437309265137, | |
| "rewards/margins": 59.599998474121094, | |
| "rewards/rejected": -52.275001525878906, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.8045112781954886, | |
| "grad_norm": 2.413237608760496e-12, | |
| "learning_rate": 2.7443609022556394e-06, | |
| "logits/chosen": -3.06640625, | |
| "logits/rejected": -4.303124904632568, | |
| "logps/chosen": -734.0, | |
| "logps/rejected": -962.2000122070312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.337500095367432, | |
| "rewards/margins": 58.900001525878906, | |
| "rewards/rejected": -51.54999923706055, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.8796992481203008, | |
| "grad_norm": 3.8499016698807266e-05, | |
| "learning_rate": 2.650375939849624e-06, | |
| "logits/chosen": -3.063281297683716, | |
| "logits/rejected": -4.34375, | |
| "logps/chosen": -741.0, | |
| "logps/rejected": -975.4000244140625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.368750095367432, | |
| "rewards/margins": 60.42499923706055, | |
| "rewards/rejected": -53.0625, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.954887218045113, | |
| "grad_norm": 1.6477178899318827e-11, | |
| "learning_rate": 2.556390977443609e-06, | |
| "logits/chosen": -3.051562547683716, | |
| "logits/rejected": -4.314062595367432, | |
| "logps/chosen": -734.2000122070312, | |
| "logps/rejected": -970.4000244140625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.464062690734863, | |
| "rewards/margins": 59.70000076293945, | |
| "rewards/rejected": -52.224998474121094, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.030075187969925, | |
| "grad_norm": 2.0829162250893226e-08, | |
| "learning_rate": 2.462406015037594e-06, | |
| "logits/chosen": -3.022656202316284, | |
| "logits/rejected": -4.27734375, | |
| "logps/chosen": -713.4000244140625, | |
| "logps/rejected": -979.4000244140625, | |
| "loss": 0.0045, | |
| "rewards/accuracies": 0.995312511920929, | |
| "rewards/chosen": 8.104687690734863, | |
| "rewards/margins": 60.17499923706055, | |
| "rewards/rejected": -52.0625, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 1.0373247696775237e-09, | |
| "learning_rate": 2.368421052631579e-06, | |
| "logits/chosen": -2.9625000953674316, | |
| "logits/rejected": -4.248437404632568, | |
| "logps/chosen": -729.2000122070312, | |
| "logps/rejected": -967.4000244140625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.295312881469727, | |
| "rewards/margins": 59.724998474121094, | |
| "rewards/rejected": -51.42499923706055, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.180451127819549, | |
| "grad_norm": 1.0771024319146517e-10, | |
| "learning_rate": 2.274436090225564e-06, | |
| "logits/chosen": -2.964062452316284, | |
| "logits/rejected": -4.268750190734863, | |
| "logps/chosen": -727.2000122070312, | |
| "logps/rejected": -964.0, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.871874809265137, | |
| "rewards/margins": 59.3125, | |
| "rewards/rejected": -51.42499923706055, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.255639097744361, | |
| "grad_norm": 3.252682975051824e-07, | |
| "learning_rate": 2.180451127819549e-06, | |
| "logits/chosen": -2.9429688453674316, | |
| "logits/rejected": -4.2734375, | |
| "logps/chosen": -725.0, | |
| "logps/rejected": -963.5999755859375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.995312690734863, | |
| "rewards/margins": 59.537498474121094, | |
| "rewards/rejected": -51.537498474121094, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.255639097744361, | |
| "eval_logits/chosen": -2.953125, | |
| "eval_logits/rejected": -4.254654407501221, | |
| "eval_logps/chosen": -734.9786987304688, | |
| "eval_logps/rejected": -968.7659301757812, | |
| "eval_loss": 3.946915239083637e-09, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 7.918882846832275, | |
| "eval_rewards/margins": 59.25, | |
| "eval_rewards/rejected": -51.32978820800781, | |
| "eval_runtime": 8.4849, | |
| "eval_samples_per_second": 176.784, | |
| "eval_score": -0.8367462158203125, | |
| "eval_steps_per_second": 5.539, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 1064, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |