| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.3834586466165413, |
| "eval_steps": 200, |
| "global_step": 900, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.07518796992481203, |
| "grad_norm": 0.00031717625653774825, |
| "learning_rate": 4.906015037593986e-06, |
| "logits/chosen": -3.08984375, |
| "logits/rejected": -3.359375, |
| "logps/chosen": -759.5999755859375, |
| "logps/rejected": -601.7999877929688, |
| "loss": 0.0505, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 3.9212889671325684, |
| "rewards/margins": 19.23046875, |
| "rewards/rejected": -15.315332412719727, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.15037593984962405, |
| "grad_norm": 2.5674383538843824e-13, |
| "learning_rate": 4.81203007518797e-06, |
| "logits/chosen": -3.37890625, |
| "logits/rejected": -3.901562452316284, |
| "logps/chosen": -797.4000244140625, |
| "logps/rejected": -834.2000122070312, |
| "loss": 0.0018, |
| "rewards/accuracies": 0.9984375238418579, |
| "rewards/chosen": 0.5521484613418579, |
| "rewards/margins": 38.98125076293945, |
| "rewards/rejected": -38.412498474121094, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.22556390977443608, |
| "grad_norm": 4.184555188916154e-12, |
| "learning_rate": 4.718045112781955e-06, |
| "logits/chosen": -3.237499952316284, |
| "logits/rejected": -4.165625095367432, |
| "logps/chosen": -774.5999755859375, |
| "logps/rejected": -944.7999877929688, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.255664110183716, |
| "rewards/margins": 53.625, |
| "rewards/rejected": -50.36249923706055, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.3007518796992481, |
| "grad_norm": 3.674299650107461e-07, |
| "learning_rate": 4.62406015037594e-06, |
| "logits/chosen": -3.0601563453674316, |
| "logits/rejected": -4.301562309265137, |
| "logps/chosen": -744.2000122070312, |
| "logps/rejected": -968.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.635937690734863, |
| "rewards/margins": 58.375, |
| "rewards/rejected": -51.724998474121094, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.37593984962406013, |
| "grad_norm": 2.9469468290533765e-13, |
| "learning_rate": 4.530075187969925e-06, |
| "logits/chosen": -3.035937547683716, |
| "logits/rejected": -4.3359375, |
| "logps/chosen": -754.0, |
| "logps/rejected": -981.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.212500095367432, |
| "rewards/margins": 58.912498474121094, |
| "rewards/rejected": -52.724998474121094, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.45112781954887216, |
| "grad_norm": 2.450421719466682e-10, |
| "learning_rate": 4.43609022556391e-06, |
| "logits/chosen": -3.0367188453674316, |
| "logits/rejected": -4.3203125, |
| "logps/chosen": -740.0, |
| "logps/rejected": -961.7999877929688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.285937309265137, |
| "rewards/margins": 57.474998474121094, |
| "rewards/rejected": -51.1875, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5263157894736842, |
| "grad_norm": 1.6351303999698253e-10, |
| "learning_rate": 4.342105263157895e-06, |
| "logits/chosen": -3.0234375, |
| "logits/rejected": -4.3046875, |
| "logps/chosen": -754.0, |
| "logps/rejected": -969.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.207812309265137, |
| "rewards/margins": 57.662498474121094, |
| "rewards/rejected": -51.412498474121094, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6015037593984962, |
| "grad_norm": 1.5915083665281199e-09, |
| "learning_rate": 4.24812030075188e-06, |
| "logits/chosen": -3.03125, |
| "logits/rejected": -4.318749904632568, |
| "logps/chosen": -748.2000122070312, |
| "logps/rejected": -958.7999877929688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.248437404632568, |
| "rewards/margins": 57.525001525878906, |
| "rewards/rejected": -51.25, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6766917293233082, |
| "grad_norm": 4.297014302694241e-12, |
| "learning_rate": 4.1541353383458646e-06, |
| "logits/chosen": -3.0406250953674316, |
| "logits/rejected": -4.279687404632568, |
| "logps/chosen": -741.4000244140625, |
| "logps/rejected": -961.4000244140625, |
| "loss": 0.0132, |
| "rewards/accuracies": 0.9906250238418579, |
| "rewards/chosen": 6.785937309265137, |
| "rewards/margins": 57.4375, |
| "rewards/rejected": -50.625, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.7518796992481203, |
| "grad_norm": 9.095973454791659e-12, |
| "learning_rate": 4.06015037593985e-06, |
| "logits/chosen": -3.055468797683716, |
| "logits/rejected": -4.318749904632568, |
| "logps/chosen": -733.0, |
| "logps/rejected": -968.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.356249809265137, |
| "rewards/margins": 59.5625, |
| "rewards/rejected": -52.224998474121094, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7518796992481203, |
| "eval_logits/chosen": -3.058178186416626, |
| "eval_logits/rejected": -4.299867153167725, |
| "eval_logps/chosen": -741.7021484375, |
| "eval_logps/rejected": -972.0850830078125, |
| "eval_loss": 2.180002622864663e-09, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 7.262632846832275, |
| "eval_rewards/margins": 58.87765884399414, |
| "eval_rewards/rejected": -51.62765884399414, |
| "eval_runtime": 8.4937, |
| "eval_samples_per_second": 176.601, |
| "eval_score": -0.6606304049491882, |
| "eval_steps_per_second": 5.533, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8270676691729323, |
| "grad_norm": 2.3694597465927517e-14, |
| "learning_rate": 3.966165413533835e-06, |
| "logits/chosen": -3.057812452316284, |
| "logits/rejected": -4.2890625, |
| "logps/chosen": -739.0, |
| "logps/rejected": -973.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.339062690734863, |
| "rewards/margins": 58.67499923706055, |
| "rewards/rejected": -51.32500076293945, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9022556390977443, |
| "grad_norm": 4.7239515256796625e-09, |
| "learning_rate": 3.87218045112782e-06, |
| "logits/chosen": -3.0648436546325684, |
| "logits/rejected": -4.301562309265137, |
| "logps/chosen": -739.4000244140625, |
| "logps/rejected": -980.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.296875, |
| "rewards/margins": 59.57500076293945, |
| "rewards/rejected": -52.25, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9774436090225563, |
| "grad_norm": 1.1010824468107971e-09, |
| "learning_rate": 3.778195488721805e-06, |
| "logits/chosen": -3.059375047683716, |
| "logits/rejected": -4.317187309265137, |
| "logps/chosen": -730.0, |
| "logps/rejected": -975.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.485937595367432, |
| "rewards/margins": 60.162498474121094, |
| "rewards/rejected": -52.67499923706055, |
| "step": 260 |
| }, |
| { |
| "epoch": 1.0526315789473684, |
| "grad_norm": 5.510844973332184e-09, |
| "learning_rate": 3.6842105263157896e-06, |
| "logits/chosen": -3.06640625, |
| "logits/rejected": -4.3046875, |
| "logps/chosen": -736.4000244140625, |
| "logps/rejected": -961.5999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.443749904632568, |
| "rewards/margins": 58.42499923706055, |
| "rewards/rejected": -51.025001525878906, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.1278195488721805, |
| "grad_norm": 1.0805968862929627e-08, |
| "learning_rate": 3.590225563909775e-06, |
| "logits/chosen": -3.067187547683716, |
| "logits/rejected": -4.329687595367432, |
| "logps/chosen": -737.0, |
| "logps/rejected": -978.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.373437404632568, |
| "rewards/margins": 59.849998474121094, |
| "rewards/rejected": -52.474998474121094, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.2030075187969924, |
| "grad_norm": 1.8280615029499892e-13, |
| "learning_rate": 3.4962406015037596e-06, |
| "logits/chosen": -3.063281297683716, |
| "logits/rejected": -4.317187309265137, |
| "logps/chosen": -739.2000122070312, |
| "logps/rejected": -977.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.349999904632568, |
| "rewards/margins": 59.67499923706055, |
| "rewards/rejected": -52.337501525878906, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.2781954887218046, |
| "grad_norm": 8.685091807936255e-12, |
| "learning_rate": 3.4022556390977448e-06, |
| "logits/chosen": -3.057812452316284, |
| "logits/rejected": -4.293749809265137, |
| "logps/chosen": -740.4000244140625, |
| "logps/rejected": -961.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.3203125, |
| "rewards/margins": 58.587501525878906, |
| "rewards/rejected": -51.25, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.3533834586466165, |
| "grad_norm": 1.6244164681709312e-07, |
| "learning_rate": 3.3082706766917295e-06, |
| "logits/chosen": -3.059375047683716, |
| "logits/rejected": -4.295312404632568, |
| "logps/chosen": -738.2000122070312, |
| "logps/rejected": -958.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.278124809265137, |
| "rewards/margins": 58.337501525878906, |
| "rewards/rejected": -51.04999923706055, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 3.8272192733826815e-13, |
| "learning_rate": 3.2142857142857147e-06, |
| "logits/chosen": -3.0648436546325684, |
| "logits/rejected": -4.318749904632568, |
| "logps/chosen": -731.0, |
| "logps/rejected": -984.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.357812404632568, |
| "rewards/margins": 60.38750076293945, |
| "rewards/rejected": -53.04999923706055, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.5037593984962405, |
| "grad_norm": 6.450130800887369e-09, |
| "learning_rate": 3.1203007518796995e-06, |
| "logits/chosen": -3.0570311546325684, |
| "logits/rejected": -4.317187309265137, |
| "logps/chosen": -735.5999755859375, |
| "logps/rejected": -965.5999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.471875190734863, |
| "rewards/margins": 59.13750076293945, |
| "rewards/rejected": -51.650001525878906, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.5037593984962405, |
| "eval_logits/chosen": -3.0611701011657715, |
| "eval_logits/rejected": -4.303191661834717, |
| "eval_logps/chosen": -741.872314453125, |
| "eval_logps/rejected": -972.85107421875, |
| "eval_loss": 1.5668466524232372e-09, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 7.266622543334961, |
| "eval_rewards/margins": 58.98404312133789, |
| "eval_rewards/rejected": -51.70744705200195, |
| "eval_runtime": 8.4851, |
| "eval_samples_per_second": 176.78, |
| "eval_score": -0.6956531405448914, |
| "eval_steps_per_second": 5.539, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.5789473684210527, |
| "grad_norm": 1.5739247094468196e-10, |
| "learning_rate": 3.0263157894736843e-06, |
| "logits/chosen": -3.063281297683716, |
| "logits/rejected": -4.301562309265137, |
| "logps/chosen": -732.5999755859375, |
| "logps/rejected": -961.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.067187309265137, |
| "rewards/margins": 58.025001525878906, |
| "rewards/rejected": -50.95000076293945, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.6541353383458648, |
| "grad_norm": 5.037244548596992e-09, |
| "learning_rate": 2.9323308270676694e-06, |
| "logits/chosen": -3.0679688453674316, |
| "logits/rejected": -4.328125, |
| "logps/chosen": -735.5999755859375, |
| "logps/rejected": -983.5999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.542187690734863, |
| "rewards/margins": 60.11249923706055, |
| "rewards/rejected": -52.54999923706055, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.7293233082706767, |
| "grad_norm": 1.907719763871417e-13, |
| "learning_rate": 2.8383458646616546e-06, |
| "logits/chosen": -3.065624952316284, |
| "logits/rejected": -4.318749904632568, |
| "logps/chosen": -732.0, |
| "logps/rejected": -971.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.348437309265137, |
| "rewards/margins": 59.599998474121094, |
| "rewards/rejected": -52.275001525878906, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.8045112781954886, |
| "grad_norm": 2.413237608760496e-12, |
| "learning_rate": 2.7443609022556394e-06, |
| "logits/chosen": -3.06640625, |
| "logits/rejected": -4.303124904632568, |
| "logps/chosen": -734.0, |
| "logps/rejected": -962.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.337500095367432, |
| "rewards/margins": 58.900001525878906, |
| "rewards/rejected": -51.54999923706055, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.8796992481203008, |
| "grad_norm": 3.8499016698807266e-05, |
| "learning_rate": 2.650375939849624e-06, |
| "logits/chosen": -3.063281297683716, |
| "logits/rejected": -4.34375, |
| "logps/chosen": -741.0, |
| "logps/rejected": -975.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.368750095367432, |
| "rewards/margins": 60.42499923706055, |
| "rewards/rejected": -53.0625, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.954887218045113, |
| "grad_norm": 1.6477178899318827e-11, |
| "learning_rate": 2.556390977443609e-06, |
| "logits/chosen": -3.051562547683716, |
| "logits/rejected": -4.314062595367432, |
| "logps/chosen": -734.2000122070312, |
| "logps/rejected": -970.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.464062690734863, |
| "rewards/margins": 59.70000076293945, |
| "rewards/rejected": -52.224998474121094, |
| "step": 520 |
| }, |
| { |
| "epoch": 2.030075187969925, |
| "grad_norm": 2.0829162250893226e-08, |
| "learning_rate": 2.462406015037594e-06, |
| "logits/chosen": -3.022656202316284, |
| "logits/rejected": -4.27734375, |
| "logps/chosen": -713.4000244140625, |
| "logps/rejected": -979.4000244140625, |
| "loss": 0.0045, |
| "rewards/accuracies": 0.995312511920929, |
| "rewards/chosen": 8.104687690734863, |
| "rewards/margins": 60.17499923706055, |
| "rewards/rejected": -52.0625, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.1052631578947367, |
| "grad_norm": 1.0373247696775237e-09, |
| "learning_rate": 2.368421052631579e-06, |
| "logits/chosen": -2.9625000953674316, |
| "logits/rejected": -4.248437404632568, |
| "logps/chosen": -729.2000122070312, |
| "logps/rejected": -967.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.295312881469727, |
| "rewards/margins": 59.724998474121094, |
| "rewards/rejected": -51.42499923706055, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.180451127819549, |
| "grad_norm": 1.0771024319146517e-10, |
| "learning_rate": 2.274436090225564e-06, |
| "logits/chosen": -2.964062452316284, |
| "logits/rejected": -4.268750190734863, |
| "logps/chosen": -727.2000122070312, |
| "logps/rejected": -964.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.871874809265137, |
| "rewards/margins": 59.3125, |
| "rewards/rejected": -51.42499923706055, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.255639097744361, |
| "grad_norm": 3.252682975051824e-07, |
| "learning_rate": 2.180451127819549e-06, |
| "logits/chosen": -2.9429688453674316, |
| "logits/rejected": -4.2734375, |
| "logps/chosen": -725.0, |
| "logps/rejected": -963.5999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.995312690734863, |
| "rewards/margins": 59.537498474121094, |
| "rewards/rejected": -51.537498474121094, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.255639097744361, |
| "eval_logits/chosen": -2.953125, |
| "eval_logits/rejected": -4.254654407501221, |
| "eval_logps/chosen": -734.9786987304688, |
| "eval_logps/rejected": -968.7659301757812, |
| "eval_loss": 3.946915239083637e-09, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 7.918882846832275, |
| "eval_rewards/margins": 59.25, |
| "eval_rewards/rejected": -51.32978820800781, |
| "eval_runtime": 8.4849, |
| "eval_samples_per_second": 176.784, |
| "eval_score": -0.8367462158203125, |
| "eval_steps_per_second": 5.539, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.3308270676691727, |
| "grad_norm": 1.0373911797620403e-10, |
| "learning_rate": 2.086466165413534e-06, |
| "logits/chosen": -2.9546875953674316, |
| "logits/rejected": -4.296875, |
| "logps/chosen": -730.0, |
| "logps/rejected": -982.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.956250190734863, |
| "rewards/margins": 60.900001525878906, |
| "rewards/rejected": -52.92499923706055, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.406015037593985, |
| "grad_norm": 8.524041237745346e-09, |
| "learning_rate": 1.9924812030075188e-06, |
| "logits/chosen": -2.9507813453674316, |
| "logits/rejected": -4.275000095367432, |
| "logps/chosen": -725.5999755859375, |
| "logps/rejected": -961.5999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.017187118530273, |
| "rewards/margins": 59.662498474121094, |
| "rewards/rejected": -51.63750076293945, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.481203007518797, |
| "grad_norm": 1.7859258054396646e-12, |
| "learning_rate": 1.898496240601504e-06, |
| "logits/chosen": -2.953906297683716, |
| "logits/rejected": -4.225781440734863, |
| "logps/chosen": -725.0, |
| "logps/rejected": -964.7999877929688, |
| "loss": 0.0037, |
| "rewards/accuracies": 0.996874988079071, |
| "rewards/chosen": 7.964062690734863, |
| "rewards/margins": 58.587501525878906, |
| "rewards/rejected": -50.599998474121094, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.556390977443609, |
| "grad_norm": 1.0660900262877694e-07, |
| "learning_rate": 1.8045112781954887e-06, |
| "logits/chosen": -2.977343797683716, |
| "logits/rejected": -4.276562690734863, |
| "logps/chosen": -729.4000244140625, |
| "logps/rejected": -979.5999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.3203125, |
| "rewards/margins": 60.9375, |
| "rewards/rejected": -52.61249923706055, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.6315789473684212, |
| "grad_norm": 1.4321429190376608e-11, |
| "learning_rate": 1.710526315789474e-06, |
| "logits/chosen": -2.97265625, |
| "logits/rejected": -4.278124809265137, |
| "logps/chosen": -723.4000244140625, |
| "logps/rejected": -970.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.307812690734863, |
| "rewards/margins": 60.48749923706055, |
| "rewards/rejected": -52.162498474121094, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.706766917293233, |
| "grad_norm": 1.3597127845174747e-07, |
| "learning_rate": 1.6165413533834587e-06, |
| "logits/chosen": -2.9710936546325684, |
| "logits/rejected": -4.296875, |
| "logps/chosen": -730.7999877929688, |
| "logps/rejected": -986.5999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.112500190734863, |
| "rewards/margins": 61.724998474121094, |
| "rewards/rejected": -53.599998474121094, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.781954887218045, |
| "grad_norm": 2.181569008354852e-13, |
| "learning_rate": 1.5225563909774439e-06, |
| "logits/chosen": -2.9749999046325684, |
| "logits/rejected": -4.271874904632568, |
| "logps/chosen": -721.0, |
| "logps/rejected": -971.5999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.370312690734863, |
| "rewards/margins": 60.67499923706055, |
| "rewards/rejected": -52.32500076293945, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 1.4223787675268966e-11, |
| "learning_rate": 1.4285714285714286e-06, |
| "logits/chosen": -2.979687452316284, |
| "logits/rejected": -4.292187690734863, |
| "logps/chosen": -735.5999755859375, |
| "logps/rejected": -979.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.193750381469727, |
| "rewards/margins": 61.3125, |
| "rewards/rejected": -53.125, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.932330827067669, |
| "grad_norm": 6.992803813674245e-08, |
| "learning_rate": 1.3345864661654136e-06, |
| "logits/chosen": -2.973437547683716, |
| "logits/rejected": -4.25, |
| "logps/chosen": -724.0, |
| "logps/rejected": -967.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.217187881469727, |
| "rewards/margins": 59.900001525878906, |
| "rewards/rejected": -51.6875, |
| "step": 780 |
| }, |
| { |
| "epoch": 3.007518796992481, |
| "grad_norm": 1.660423123855119e-08, |
| "learning_rate": 1.2406015037593986e-06, |
| "logits/chosen": -2.975781202316284, |
| "logits/rejected": -4.293749809265137, |
| "logps/chosen": -731.7999877929688, |
| "logps/rejected": -990.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.190625190734863, |
| "rewards/margins": 61.974998474121094, |
| "rewards/rejected": -53.76250076293945, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.007518796992481, |
| "eval_logits/chosen": -2.9724068641662598, |
| "eval_logits/rejected": -4.260638236999512, |
| "eval_logps/chosen": -733.4468383789062, |
| "eval_logps/rejected": -979.574462890625, |
| "eval_loss": 1.755893541677267e-09, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": 8.110372543334961, |
| "eval_rewards/margins": 60.537235260009766, |
| "eval_rewards/rejected": -52.441490173339844, |
| "eval_runtime": 8.4913, |
| "eval_samples_per_second": 176.652, |
| "eval_score": -0.7882758378982544, |
| "eval_steps_per_second": 5.535, |
| "step": 800 |
| }, |
| { |
| "epoch": 3.082706766917293, |
| "grad_norm": 8.005659023834536e-12, |
| "learning_rate": 1.1466165413533836e-06, |
| "logits/chosen": -2.9710936546325684, |
| "logits/rejected": -4.2890625, |
| "logps/chosen": -731.2000122070312, |
| "logps/rejected": -993.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.2578125, |
| "rewards/margins": 62.13750076293945, |
| "rewards/rejected": -53.900001525878906, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.1578947368421053, |
| "grad_norm": 5.843963868032614e-09, |
| "learning_rate": 1.0526315789473685e-06, |
| "logits/chosen": -2.975781202316284, |
| "logits/rejected": -4.279687404632568, |
| "logps/chosen": -723.7999877929688, |
| "logps/rejected": -973.4000244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.0703125, |
| "rewards/margins": 60.662498474121094, |
| "rewards/rejected": -52.57500076293945, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.2330827067669174, |
| "grad_norm": 1.4081920596327731e-08, |
| "learning_rate": 9.586466165413535e-07, |
| "logits/chosen": -2.969531297683716, |
| "logits/rejected": -4.287499904632568, |
| "logps/chosen": -732.4000244140625, |
| "logps/rejected": -984.7999877929688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.153124809265137, |
| "rewards/margins": 61.599998474121094, |
| "rewards/rejected": -53.45000076293945, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.308270676691729, |
| "grad_norm": 6.352507576934744e-09, |
| "learning_rate": 8.646616541353384e-07, |
| "logits/chosen": -2.9742188453674316, |
| "logits/rejected": -4.279687404632568, |
| "logps/chosen": -727.7999877929688, |
| "logps/rejected": -974.5999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.331250190734863, |
| "rewards/margins": 60.92499923706055, |
| "rewards/rejected": -52.650001525878906, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.3834586466165413, |
| "grad_norm": 1.3822216777037244e-07, |
| "learning_rate": 7.706766917293233e-07, |
| "logits/chosen": -2.9828124046325684, |
| "logits/rejected": -4.2734375, |
| "logps/chosen": -725.4000244140625, |
| "logps/rejected": -971.2000122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.231249809265137, |
| "rewards/margins": 60.712501525878906, |
| "rewards/rejected": -52.5, |
| "step": 900 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 1064, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 300, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|