| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 400, | |
| "global_step": 468, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.010683760683760684, | |
| "grad_norm": 18.387579925622582, | |
| "learning_rate": 8.51063829787234e-08, | |
| "logits/chosen": -1.4812500476837158, | |
| "logits/rejected": -1.4343750476837158, | |
| "logps/chosen": -0.81640625, | |
| "logps/rejected": -0.850390613079071, | |
| "loss": 1.7676, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.4671874940395355, | |
| "rewards/chosen": -2.0406250953674316, | |
| "rewards/margins": 0.08261718600988388, | |
| "rewards/rejected": -2.1226563453674316, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.021367521367521368, | |
| "grad_norm": 21.659560893283086, | |
| "learning_rate": 1.9148936170212765e-07, | |
| "logits/chosen": -1.43359375, | |
| "logits/rejected": -1.396875023841858, | |
| "logps/chosen": -0.7269531488418579, | |
| "logps/rejected": -0.7250000238418579, | |
| "loss": 1.7614, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -1.818750023841858, | |
| "rewards/margins": -0.007250976748764515, | |
| "rewards/rejected": -1.810937523841858, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03205128205128205, | |
| "grad_norm": 14.389258634685925, | |
| "learning_rate": 2.978723404255319e-07, | |
| "logits/chosen": -1.489843726158142, | |
| "logits/rejected": -1.4617187976837158, | |
| "logps/chosen": -0.73046875, | |
| "logps/rejected": -0.7445312738418579, | |
| "loss": 1.6841, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -1.825781226158142, | |
| "rewards/margins": 0.03479614108800888, | |
| "rewards/rejected": -1.860937476158142, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.042735042735042736, | |
| "grad_norm": 8.572904416071315, | |
| "learning_rate": 4.0425531914893614e-07, | |
| "logits/chosen": -1.4171874523162842, | |
| "logits/rejected": -1.403906226158142, | |
| "logps/chosen": -0.71875, | |
| "logps/rejected": -0.80859375, | |
| "loss": 1.6713, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5093749761581421, | |
| "rewards/chosen": -1.7960937023162842, | |
| "rewards/margins": 0.22412109375, | |
| "rewards/rejected": -2.0218749046325684, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.053418803418803416, | |
| "grad_norm": 12.085534263291942, | |
| "learning_rate": 5.106382978723403e-07, | |
| "logits/chosen": -1.471093773841858, | |
| "logits/rejected": -1.44921875, | |
| "logps/chosen": -0.6435546875, | |
| "logps/rejected": -0.68359375, | |
| "loss": 1.6299, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.526562511920929, | |
| "rewards/chosen": -1.610937476158142, | |
| "rewards/margins": 0.09737396240234375, | |
| "rewards/rejected": -1.7078125476837158, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 14.051949061509276, | |
| "learning_rate": 6.170212765957446e-07, | |
| "logits/chosen": -1.46484375, | |
| "logits/rejected": -1.4367187023162842, | |
| "logps/chosen": -0.5189453363418579, | |
| "logps/rejected": -0.5455077886581421, | |
| "loss": 1.5902, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.4984374940395355, | |
| "rewards/chosen": -1.296875, | |
| "rewards/margins": 0.06621094048023224, | |
| "rewards/rejected": -1.3640625476837158, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07478632478632478, | |
| "grad_norm": 15.304062972124502, | |
| "learning_rate": 7.23404255319149e-07, | |
| "logits/chosen": -1.497656226158142, | |
| "logits/rejected": -1.46875, | |
| "logps/chosen": -0.4546875059604645, | |
| "logps/rejected": -0.47148436307907104, | |
| "loss": 1.6006, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -1.135156273841858, | |
| "rewards/margins": 0.04415283352136612, | |
| "rewards/rejected": -1.1796875, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.08547008547008547, | |
| "grad_norm": 19.104391719681082, | |
| "learning_rate": 8.297872340425532e-07, | |
| "logits/chosen": -1.5343749523162842, | |
| "logits/rejected": -1.485937476158142, | |
| "logps/chosen": -0.41132813692092896, | |
| "logps/rejected": -0.4263671934604645, | |
| "loss": 1.5897, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.520312488079071, | |
| "rewards/chosen": -1.0285155773162842, | |
| "rewards/margins": 0.03793945163488388, | |
| "rewards/rejected": -1.06640625, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09615384615384616, | |
| "grad_norm": 8.645908098858397, | |
| "learning_rate": 9.361702127659575e-07, | |
| "logits/chosen": -1.557031273841858, | |
| "logits/rejected": -1.52734375, | |
| "logps/chosen": -0.3822265565395355, | |
| "logps/rejected": -0.4048828184604645, | |
| "loss": 1.5691, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.955078125, | |
| "rewards/margins": 0.05756836012005806, | |
| "rewards/rejected": -1.0128905773162842, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10683760683760683, | |
| "grad_norm": 9.00331703799887, | |
| "learning_rate": 9.999443163759668e-07, | |
| "logits/chosen": -1.497656226158142, | |
| "logits/rejected": -1.4695312976837158, | |
| "logps/chosen": -0.3783203065395355, | |
| "logps/rejected": -0.421875, | |
| "loss": 1.5388, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.535937488079071, | |
| "rewards/chosen": -0.946093738079071, | |
| "rewards/margins": 0.10857544094324112, | |
| "rewards/rejected": -1.055078148841858, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11752136752136752, | |
| "grad_norm": 12.767356159141897, | |
| "learning_rate": 9.993180180337126e-07, | |
| "logits/chosen": -1.4953124523162842, | |
| "logits/rejected": -1.46875, | |
| "logps/chosen": -0.3998046815395355, | |
| "logps/rejected": -0.4404296875, | |
| "loss": 1.5484, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5296875238418579, | |
| "rewards/chosen": -1.0007812976837158, | |
| "rewards/margins": 0.10200195014476776, | |
| "rewards/rejected": -1.1015625, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 10.195545822550427, | |
| "learning_rate": 9.979966915051516e-07, | |
| "logits/chosen": -1.52734375, | |
| "logits/rejected": -1.4914062023162842, | |
| "logps/chosen": -0.41523438692092896, | |
| "logps/rejected": -0.4541015625, | |
| "loss": 1.5552, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5390625, | |
| "rewards/chosen": -1.0398437976837158, | |
| "rewards/margins": 0.0967559814453125, | |
| "rewards/rejected": -1.135156273841858, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1388888888888889, | |
| "grad_norm": 10.716342096548614, | |
| "learning_rate": 9.959821760172848e-07, | |
| "logits/chosen": -1.506250023841858, | |
| "logits/rejected": -1.466406226158142, | |
| "logps/chosen": -0.3990234434604645, | |
| "logps/rejected": -0.44023436307907104, | |
| "loss": 1.5528, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.999218761920929, | |
| "rewards/margins": 0.10208740085363388, | |
| "rewards/rejected": -1.099609375, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.14957264957264957, | |
| "grad_norm": 18.26340093274305, | |
| "learning_rate": 9.932772756849152e-07, | |
| "logits/chosen": -1.532812476158142, | |
| "logits/rejected": -1.49609375, | |
| "logps/chosen": -0.4326171875, | |
| "logps/rejected": -0.49980467557907104, | |
| "loss": 1.5508, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -1.080468773841858, | |
| "rewards/margins": 0.16822509467601776, | |
| "rewards/rejected": -1.25, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.16025641025641027, | |
| "grad_norm": 21.259278645333858, | |
| "learning_rate": 9.898857556074466e-07, | |
| "logits/chosen": -1.5671875476837158, | |
| "logits/rejected": -1.549218773841858, | |
| "logps/chosen": -0.44921875, | |
| "logps/rejected": -0.5394531488418579, | |
| "loss": 1.505, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -1.123437523841858, | |
| "rewards/margins": 0.22548827528953552, | |
| "rewards/rejected": -1.3484375476837158, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.17094017094017094, | |
| "grad_norm": 12.244653076137343, | |
| "learning_rate": 9.858123366280356e-07, | |
| "logits/chosen": -1.588281273841858, | |
| "logits/rejected": -1.572656273841858, | |
| "logps/chosen": -0.4935546815395355, | |
| "logps/rejected": -0.571093738079071, | |
| "loss": 1.5305, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5234375, | |
| "rewards/chosen": -1.232812523841858, | |
| "rewards/margins": 0.19204100966453552, | |
| "rewards/rejected": -1.4265625476837158, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18162393162393162, | |
| "grad_norm": 10.444731750558073, | |
| "learning_rate": 9.810626887623898e-07, | |
| "logits/chosen": -1.5390625, | |
| "logits/rejected": -1.5109374523162842, | |
| "logps/chosen": -0.4673828184604645, | |
| "logps/rejected": -0.542773425579071, | |
| "loss": 1.5526, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.526562511920929, | |
| "rewards/chosen": -1.170312523841858, | |
| "rewards/margins": 0.18715819716453552, | |
| "rewards/rejected": -1.357031226158142, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 10.370326525798975, | |
| "learning_rate": 9.756434233063615e-07, | |
| "logits/chosen": -1.525781273841858, | |
| "logits/rejected": -1.489843726158142, | |
| "logps/chosen": -0.40058594942092896, | |
| "logps/rejected": -0.46113282442092896, | |
| "loss": 1.5482, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.559374988079071, | |
| "rewards/chosen": -1.001562476158142, | |
| "rewards/margins": 0.15218810737133026, | |
| "rewards/rejected": -1.1535155773162842, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.202991452991453, | |
| "grad_norm": 11.39793809106751, | |
| "learning_rate": 9.695620836333219e-07, | |
| "logits/chosen": -1.575781226158142, | |
| "logits/rejected": -1.5671875476837158, | |
| "logps/chosen": -0.3656249940395355, | |
| "logps/rejected": -0.4404296875, | |
| "loss": 1.5179, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.9140625, | |
| "rewards/margins": 0.18659667670726776, | |
| "rewards/rejected": -1.1007812023162842, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.21367521367521367, | |
| "grad_norm": 13.822852555772993, | |
| "learning_rate": 9.628271346941252e-07, | |
| "logits/chosen": -1.564062476158142, | |
| "logits/rejected": -1.517968773841858, | |
| "logps/chosen": -0.3890624940395355, | |
| "logps/rejected": -0.4488281309604645, | |
| "loss": 1.5532, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.534375011920929, | |
| "rewards/chosen": -0.9730468988418579, | |
| "rewards/margins": 0.14824219048023224, | |
| "rewards/rejected": -1.120703101158142, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22435897435897437, | |
| "grad_norm": 12.7396003923963, | |
| "learning_rate": 9.554479512342783e-07, | |
| "logits/chosen": -1.533593773841858, | |
| "logits/rejected": -1.498437523841858, | |
| "logps/chosen": -0.3792968690395355, | |
| "logps/rejected": -0.4404296875, | |
| "loss": 1.538, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.542187511920929, | |
| "rewards/chosen": -0.9476562738418579, | |
| "rewards/margins": 0.15297850966453552, | |
| "rewards/rejected": -1.1003906726837158, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.23504273504273504, | |
| "grad_norm": 21.261402234301844, | |
| "learning_rate": 9.474348047447176e-07, | |
| "logits/chosen": -1.568750023841858, | |
| "logits/rejected": -1.5234375, | |
| "logps/chosen": -0.38749998807907104, | |
| "logps/rejected": -0.43574219942092896, | |
| "loss": 1.546, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.528124988079071, | |
| "rewards/chosen": -0.967578113079071, | |
| "rewards/margins": 0.12104491889476776, | |
| "rewards/rejected": -1.0890624523162842, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.24572649572649571, | |
| "grad_norm": 17.398796131106096, | |
| "learning_rate": 9.387988491643557e-07, | |
| "logits/chosen": -1.55859375, | |
| "logits/rejected": -1.540624976158142, | |
| "logps/chosen": -0.4195312559604645, | |
| "logps/rejected": -0.503710925579071, | |
| "loss": 1.5278, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -1.048437476158142, | |
| "rewards/margins": 0.21040038764476776, | |
| "rewards/rejected": -1.2585937976837158, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 11.87400607253576, | |
| "learning_rate": 9.295521053543019e-07, | |
| "logits/chosen": -1.583593726158142, | |
| "logits/rejected": -1.571874976158142, | |
| "logps/chosen": -0.439453125, | |
| "logps/rejected": -0.5162109136581421, | |
| "loss": 1.519, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.573437511920929, | |
| "rewards/chosen": -1.0988280773162842, | |
| "rewards/margins": 0.19086913764476776, | |
| "rewards/rejected": -1.2882812023162842, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2670940170940171, | |
| "grad_norm": 11.436299073464317, | |
| "learning_rate": 9.197074443653642e-07, | |
| "logits/chosen": -1.58984375, | |
| "logits/rejected": -1.56640625, | |
| "logps/chosen": -0.5277343988418579, | |
| "logps/rejected": -0.623828113079071, | |
| "loss": 1.5477, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -1.318750023841858, | |
| "rewards/margins": 0.24033813178539276, | |
| "rewards/rejected": -1.5593750476837158, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 14.626979030522325, | |
| "learning_rate": 9.09278569522127e-07, | |
| "logits/chosen": -1.5968749523162842, | |
| "logits/rejected": -1.546875, | |
| "logps/chosen": -0.48027342557907104, | |
| "logps/rejected": -0.6021484136581421, | |
| "loss": 1.4972, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -1.200781226158142, | |
| "rewards/margins": 0.304443359375, | |
| "rewards/rejected": -1.505468726158142, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.28846153846153844, | |
| "grad_norm": 11.845040694170025, | |
| "learning_rate": 8.982799973485406e-07, | |
| "logits/chosen": -1.5703125, | |
| "logits/rejected": -1.536718726158142, | |
| "logps/chosen": -0.4761718809604645, | |
| "logps/rejected": -0.6029297113418579, | |
| "loss": 1.4762, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5765625238418579, | |
| "rewards/chosen": -1.191015601158142, | |
| "rewards/margins": 0.31770020723342896, | |
| "rewards/rejected": -1.509374976158142, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.29914529914529914, | |
| "grad_norm": 13.92232790418488, | |
| "learning_rate": 8.867270373615734e-07, | |
| "logits/chosen": -1.5460937023162842, | |
| "logits/rejected": -1.521875023841858, | |
| "logps/chosen": -0.4703125059604645, | |
| "logps/rejected": -0.5712890625, | |
| "loss": 1.5045, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5843750238418579, | |
| "rewards/chosen": -1.1749999523162842, | |
| "rewards/margins": 0.2523437440395355, | |
| "rewards/rejected": -1.4289062023162842, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.30982905982905984, | |
| "grad_norm": 12.635415703289459, | |
| "learning_rate": 8.746357707610543e-07, | |
| "logits/chosen": -1.5359375476837158, | |
| "logits/rejected": -1.505468726158142, | |
| "logps/chosen": -0.4593749940395355, | |
| "logps/rejected": -0.563671886920929, | |
| "loss": 1.5233, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.582812488079071, | |
| "rewards/chosen": -1.1476562023162842, | |
| "rewards/margins": 0.25996094942092896, | |
| "rewards/rejected": -1.4093749523162842, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 11.154844952921092, | |
| "learning_rate": 8.620230280453671e-07, | |
| "logits/chosen": -1.5203125476837158, | |
| "logits/rejected": -1.4921875, | |
| "logps/chosen": -0.48515623807907104, | |
| "logps/rejected": -0.586718738079071, | |
| "loss": 1.4909, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6109374761581421, | |
| "rewards/chosen": -1.213281273841858, | |
| "rewards/margins": 0.2548828125, | |
| "rewards/rejected": -1.46875, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3311965811965812, | |
| "grad_norm": 18.4281131810382, | |
| "learning_rate": 8.48906365584155e-07, | |
| "logits/chosen": -1.529687523841858, | |
| "logits/rejected": -1.4921875, | |
| "logps/chosen": -0.4828124940395355, | |
| "logps/rejected": -0.6187499761581421, | |
| "loss": 1.4699, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -1.2082030773162842, | |
| "rewards/margins": 0.3387695252895355, | |
| "rewards/rejected": -1.545312523841858, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.3418803418803419, | |
| "grad_norm": 13.615385650176293, | |
| "learning_rate": 8.353040411806447e-07, | |
| "logits/chosen": -1.544531226158142, | |
| "logits/rejected": -1.5125000476837158, | |
| "logps/chosen": -0.5599609613418579, | |
| "logps/rejected": -0.6734374761581421, | |
| "loss": 1.5053, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5609375238418579, | |
| "rewards/chosen": -1.3992187976837158, | |
| "rewards/margins": 0.2828125059604645, | |
| "rewards/rejected": -1.6828124523162842, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3525641025641026, | |
| "grad_norm": 26.41527109835978, | |
| "learning_rate": 8.212349886576069e-07, | |
| "logits/chosen": -1.600000023841858, | |
| "logits/rejected": -1.5515625476837158, | |
| "logps/chosen": -0.535937488079071, | |
| "logps/rejected": -0.670703113079071, | |
| "loss": 1.477, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.604687511920929, | |
| "rewards/chosen": -1.33984375, | |
| "rewards/margins": 0.3355468809604645, | |
| "rewards/rejected": -1.6749999523162842, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.36324786324786323, | |
| "grad_norm": 13.57171264424383, | |
| "learning_rate": 8.067187915023281e-07, | |
| "logits/chosen": -1.553125023841858, | |
| "logits/rejected": -1.5265624523162842, | |
| "logps/chosen": -0.5589843988418579, | |
| "logps/rejected": -0.6937500238418579, | |
| "loss": 1.471, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.567187488079071, | |
| "rewards/chosen": -1.3984375, | |
| "rewards/margins": 0.3372802734375, | |
| "rewards/rejected": -1.735937476158142, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37393162393162394, | |
| "grad_norm": 19.506176492341762, | |
| "learning_rate": 7.91775655607279e-07, | |
| "logits/chosen": -1.560156226158142, | |
| "logits/rejected": -1.541406273841858, | |
| "logps/chosen": -0.583984375, | |
| "logps/rejected": -0.7816406488418579, | |
| "loss": 1.4303, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -1.4617187976837158, | |
| "rewards/margins": 0.49335938692092896, | |
| "rewards/rejected": -1.954687476158142, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 21.640230091832745, | |
| "learning_rate": 7.764263811444214e-07, | |
| "logits/chosen": -1.5851562023162842, | |
| "logits/rejected": -1.5539062023162842, | |
| "logps/chosen": -0.5960937738418579, | |
| "logps/rejected": -0.7250000238418579, | |
| "loss": 1.4788, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.598437488079071, | |
| "rewards/chosen": -1.490625023841858, | |
| "rewards/margins": 0.322265625, | |
| "rewards/rejected": -1.8125, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3952991452991453, | |
| "grad_norm": 17.283034944275915, | |
| "learning_rate": 7.606923336123069e-07, | |
| "logits/chosen": -1.546875, | |
| "logits/rejected": -1.52734375, | |
| "logps/chosen": -0.6265624761581421, | |
| "logps/rejected": -0.7578125, | |
| "loss": 1.5019, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -1.5656249523162842, | |
| "rewards/margins": 0.328369140625, | |
| "rewards/rejected": -1.89453125, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.405982905982906, | |
| "grad_norm": 16.927291550124732, | |
| "learning_rate": 7.445954140962649e-07, | |
| "logits/chosen": -1.595312476158142, | |
| "logits/rejected": -1.564843773841858, | |
| "logps/chosen": -0.592578113079071, | |
| "logps/rejected": -0.792187511920929, | |
| "loss": 1.4263, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.629687488079071, | |
| "rewards/chosen": -1.4835937023162842, | |
| "rewards/margins": 0.49609375, | |
| "rewards/rejected": -1.9796874523162842, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 19.885105063023072, | |
| "learning_rate": 7.28158028783079e-07, | |
| "logits/chosen": -1.5578124523162842, | |
| "logits/rejected": -1.5304687023162842, | |
| "logps/chosen": -0.60546875, | |
| "logps/rejected": -0.7406250238418579, | |
| "loss": 1.4749, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.596875011920929, | |
| "rewards/chosen": -1.513281226158142, | |
| "rewards/margins": 0.33740234375, | |
| "rewards/rejected": -1.8523437976837158, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 16.181127748785606, | |
| "learning_rate": 7.114030577725835e-07, | |
| "logits/chosen": -1.576562523841858, | |
| "logits/rejected": -1.541406273841858, | |
| "logps/chosen": -0.6292968988418579, | |
| "logps/rejected": -0.7671874761581421, | |
| "loss": 1.4646, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5703125, | |
| "rewards/chosen": -1.5734374523162842, | |
| "rewards/margins": 0.34318846464157104, | |
| "rewards/rejected": -1.9171874523162842, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.43803418803418803, | |
| "grad_norm": 20.82169481298227, | |
| "learning_rate": 6.943538232295964e-07, | |
| "logits/chosen": -1.607812523841858, | |
| "logits/rejected": -1.5859375, | |
| "logps/chosen": -0.665234386920929, | |
| "logps/rejected": -0.823437511920929, | |
| "loss": 1.4523, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6109374761581421, | |
| "rewards/chosen": -1.662500023841858, | |
| "rewards/margins": 0.3946289122104645, | |
| "rewards/rejected": -2.0562500953674316, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.44871794871794873, | |
| "grad_norm": 25.003069519753712, | |
| "learning_rate": 6.770340569205157e-07, | |
| "logits/chosen": -1.6203124523162842, | |
| "logits/rejected": -1.5812499523162842, | |
| "logps/chosen": -0.7738281488418579, | |
| "logps/rejected": -0.957812488079071, | |
| "loss": 1.4483, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.620312511920929, | |
| "rewards/chosen": -1.9343750476837158, | |
| "rewards/margins": 0.4595703184604645, | |
| "rewards/rejected": -2.393749952316284, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.4594017094017094, | |
| "grad_norm": 18.610256654997034, | |
| "learning_rate": 6.594678671797703e-07, | |
| "logits/chosen": -1.6007812023162842, | |
| "logits/rejected": -1.572656273841858, | |
| "logps/chosen": -0.77734375, | |
| "logps/rejected": -1.0222656726837158, | |
| "loss": 1.4027, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": -1.943750023841858, | |
| "rewards/margins": 0.610156238079071, | |
| "rewards/rejected": -2.5562500953674316, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4700854700854701, | |
| "grad_norm": 22.948907291803067, | |
| "learning_rate": 6.416797053521038e-07, | |
| "logits/chosen": -1.588281273841858, | |
| "logits/rejected": -1.5539062023162842, | |
| "logps/chosen": -0.78515625, | |
| "logps/rejected": -0.969531238079071, | |
| "loss": 1.4565, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6468750238418579, | |
| "rewards/chosen": -1.9609375, | |
| "rewards/margins": 0.45917969942092896, | |
| "rewards/rejected": -2.421875, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4807692307692308, | |
| "grad_norm": 22.112843833650466, | |
| "learning_rate": 6.236943317574054e-07, | |
| "logits/chosen": -1.5476562976837158, | |
| "logits/rejected": -1.5203125476837158, | |
| "logps/chosen": -0.774609386920929, | |
| "logps/rejected": -0.942187488079071, | |
| "loss": 1.4957, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.609375, | |
| "rewards/chosen": -1.9367187023162842, | |
| "rewards/margins": 0.4192871153354645, | |
| "rewards/rejected": -2.356250047683716, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.49145299145299143, | |
| "grad_norm": 23.56709379174187, | |
| "learning_rate": 6.055367812254592e-07, | |
| "logits/chosen": -1.5867187976837158, | |
| "logits/rejected": -1.56640625, | |
| "logps/chosen": -0.749218761920929, | |
| "logps/rejected": -0.935546875, | |
| "loss": 1.4253, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6156250238418579, | |
| "rewards/chosen": -1.8757812976837158, | |
| "rewards/margins": 0.46416014432907104, | |
| "rewards/rejected": -2.335156202316284, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5021367521367521, | |
| "grad_norm": 22.251408755340613, | |
| "learning_rate": 5.872323282485888e-07, | |
| "logits/chosen": -1.5859375, | |
| "logits/rejected": -1.549218773841858, | |
| "logps/chosen": -0.7632812261581421, | |
| "logps/rejected": -0.934374988079071, | |
| "loss": 1.4625, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.609375, | |
| "rewards/chosen": -1.908593773841858, | |
| "rewards/margins": 0.4273437559604645, | |
| "rewards/rejected": -2.33203125, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 20.6813266166916, | |
| "learning_rate": 5.688064518007035e-07, | |
| "logits/chosen": -1.575781226158142, | |
| "logits/rejected": -1.564843773841858, | |
| "logps/chosen": -0.7757812738418579, | |
| "logps/rejected": -0.98828125, | |
| "loss": 1.3784, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.653124988079071, | |
| "rewards/chosen": -1.9382812976837158, | |
| "rewards/margins": 0.53125, | |
| "rewards/rejected": -2.4703125953674316, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5235042735042735, | |
| "grad_norm": 15.883505475500101, | |
| "learning_rate": 5.50284799871714e-07, | |
| "logits/chosen": -1.6062500476837158, | |
| "logits/rejected": -1.56640625, | |
| "logps/chosen": -0.735156238079071, | |
| "logps/rejected": -0.940234363079071, | |
| "loss": 1.4273, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6078125238418579, | |
| "rewards/chosen": -1.838281273841858, | |
| "rewards/margins": 0.5125976800918579, | |
| "rewards/rejected": -2.3499999046325684, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5341880341880342, | |
| "grad_norm": 25.5478315800542, | |
| "learning_rate": 5.316931537666856e-07, | |
| "logits/chosen": -1.61328125, | |
| "logits/rejected": -1.5828125476837158, | |
| "logps/chosen": -0.785937488079071, | |
| "logps/rejected": -0.9281250238418579, | |
| "loss": 1.4669, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.5921875238418579, | |
| "rewards/chosen": -1.96484375, | |
| "rewards/margins": 0.35576170682907104, | |
| "rewards/rejected": -2.319531202316284, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5448717948717948, | |
| "grad_norm": 25.662663107269402, | |
| "learning_rate": 5.130573922194236e-07, | |
| "logits/chosen": -1.6218750476837158, | |
| "logits/rejected": -1.5945312976837158, | |
| "logps/chosen": -0.8207031488418579, | |
| "logps/rejected": -1.051171898841858, | |
| "loss": 1.3959, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.645312488079071, | |
| "rewards/chosen": -2.0492186546325684, | |
| "rewards/margins": 0.574414074420929, | |
| "rewards/rejected": -2.6234374046325684, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 19.93174202171367, | |
| "learning_rate": 4.944034553704412e-07, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.6015625, | |
| "logps/chosen": -0.8949218988418579, | |
| "logps/rejected": -1.1492187976837158, | |
| "loss": 1.4123, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.235156297683716, | |
| "rewards/margins": 0.637499988079071, | |
| "rewards/rejected": -2.8734374046325684, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5662393162393162, | |
| "grad_norm": 25.25500473771292, | |
| "learning_rate": 4.7575730865945284e-07, | |
| "logits/chosen": -1.5906250476837158, | |
| "logits/rejected": -1.568750023841858, | |
| "logps/chosen": -0.8773437738418579, | |
| "logps/rejected": -1.0632812976837158, | |
| "loss": 1.4437, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.651562511920929, | |
| "rewards/chosen": -2.1929688453674316, | |
| "rewards/margins": 0.4657226502895355, | |
| "rewards/rejected": -2.660937547683716, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 26.727173719859465, | |
| "learning_rate": 4.5714490668265237e-07, | |
| "logits/chosen": -1.59375, | |
| "logits/rejected": -1.575781226158142, | |
| "logps/chosen": -0.891406238079071, | |
| "logps/rejected": -1.0867187976837158, | |
| "loss": 1.4468, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.609375, | |
| "rewards/chosen": -2.2281250953674316, | |
| "rewards/margins": 0.4881591796875, | |
| "rewards/rejected": -2.7171874046325684, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5876068376068376, | |
| "grad_norm": 23.107407158744333, | |
| "learning_rate": 4.385921570650829e-07, | |
| "logits/chosen": -1.626562476158142, | |
| "logits/rejected": -1.6007812023162842, | |
| "logps/chosen": -0.846875011920929, | |
| "logps/rejected": -1.054296851158142, | |
| "loss": 1.3979, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6734374761581421, | |
| "rewards/chosen": -2.116406202316284, | |
| "rewards/margins": 0.521728515625, | |
| "rewards/rejected": -2.637500047683716, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5982905982905983, | |
| "grad_norm": 24.943044733477908, | |
| "learning_rate": 4.201248843983918e-07, | |
| "logits/chosen": -1.5890624523162842, | |
| "logits/rejected": -1.5554687976837158, | |
| "logps/chosen": -0.8343750238418579, | |
| "logps/rejected": -1.064062476158142, | |
| "loss": 1.3962, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.660937488079071, | |
| "rewards/chosen": -2.085156202316284, | |
| "rewards/margins": 0.572070300579071, | |
| "rewards/rejected": -2.65625, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6089743589743589, | |
| "grad_norm": 20.205385409855005, | |
| "learning_rate": 4.0176879429416083e-07, | |
| "logits/chosen": -1.592187523841858, | |
| "logits/rejected": -1.544531226158142, | |
| "logps/chosen": -0.9039062261581421, | |
| "logps/rejected": -1.1015625, | |
| "loss": 1.4174, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -2.262500047683716, | |
| "rewards/margins": 0.4927734434604645, | |
| "rewards/rejected": -2.753124952316284, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6196581196581197, | |
| "grad_norm": 37.04376569884301, | |
| "learning_rate": 3.8354943760285435e-07, | |
| "logits/chosen": -1.618749976158142, | |
| "logits/rejected": -1.6015625, | |
| "logps/chosen": -0.952343761920929, | |
| "logps/rejected": -1.205468773841858, | |
| "loss": 1.3805, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6484375, | |
| "rewards/chosen": -2.378124952316284, | |
| "rewards/margins": 0.6346679925918579, | |
| "rewards/rejected": -3.0140624046325684, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6303418803418803, | |
| "grad_norm": 28.57757441969063, | |
| "learning_rate": 3.6549217484818573e-07, | |
| "logits/chosen": -1.6015625, | |
| "logits/rejected": -1.5828125476837158, | |
| "logps/chosen": -0.987500011920929, | |
| "logps/rejected": -1.204687476158142, | |
| "loss": 1.4307, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6484375, | |
| "rewards/chosen": -2.46875, | |
| "rewards/margins": 0.5419921875, | |
| "rewards/rejected": -3.012500047683716, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 26.97457548170502, | |
| "learning_rate": 3.4762214092641096e-07, | |
| "logits/chosen": -1.5906250476837158, | |
| "logits/rejected": -1.56640625, | |
| "logps/chosen": -0.912109375, | |
| "logps/rejected": -1.1707031726837158, | |
| "loss": 1.3817, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.635937511920929, | |
| "rewards/chosen": -2.2796874046325684, | |
| "rewards/margins": 0.6458984613418579, | |
| "rewards/rejected": -2.926562547683716, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6517094017094017, | |
| "grad_norm": 18.40872102433568, | |
| "learning_rate": 3.299642101196854e-07, | |
| "logits/chosen": -1.626562476158142, | |
| "logits/rejected": -1.6007812023162842, | |
| "logps/chosen": -0.8511718511581421, | |
| "logps/rejected": -1.115234375, | |
| "loss": 1.3859, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6421874761581421, | |
| "rewards/chosen": -2.1273436546325684, | |
| "rewards/margins": 0.660351574420929, | |
| "rewards/rejected": -2.7890625, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6623931623931624, | |
| "grad_norm": 48.83431435177251, | |
| "learning_rate": 3.125429614721842e-07, | |
| "logits/chosen": -1.5773437023162842, | |
| "logits/rejected": -1.5656249523162842, | |
| "logps/chosen": -0.9214843511581421, | |
| "logps/rejected": -1.154296875, | |
| "loss": 1.4462, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.628125011920929, | |
| "rewards/chosen": -2.3023438453674316, | |
| "rewards/margins": 0.5805908441543579, | |
| "rewards/rejected": -2.8843750953674316, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.6730769230769231, | |
| "grad_norm": 26.010483998640073, | |
| "learning_rate": 2.953826445771788e-07, | |
| "logits/chosen": -1.584375023841858, | |
| "logits/rejected": -1.5671875476837158, | |
| "logps/chosen": -0.907031238079071, | |
| "logps/rejected": -1.125, | |
| "loss": 1.4235, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -2.2679686546325684, | |
| "rewards/margins": 0.5430663824081421, | |
| "rewards/rejected": -2.8125, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6837606837606838, | |
| "grad_norm": 25.722431614221733, | |
| "learning_rate": 2.785071458226972e-07, | |
| "logits/chosen": -1.592187523841858, | |
| "logits/rejected": -1.5617187023162842, | |
| "logps/chosen": -0.90625, | |
| "logps/rejected": -1.146093726158142, | |
| "loss": 1.3948, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": -2.2671875953674316, | |
| "rewards/margins": 0.595898449420929, | |
| "rewards/rejected": -2.864062547683716, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6944444444444444, | |
| "grad_norm": 27.64784221370248, | |
| "learning_rate": 2.6193995514274705e-07, | |
| "logits/chosen": -1.5851562023162842, | |
| "logits/rejected": -1.5625, | |
| "logps/chosen": -0.9371093511581421, | |
| "logps/rejected": -1.185156226158142, | |
| "loss": 1.3706, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6546875238418579, | |
| "rewards/chosen": -2.3414063453674316, | |
| "rewards/margins": 0.6197265386581421, | |
| "rewards/rejected": -2.9625000953674316, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7051282051282052, | |
| "grad_norm": 26.29403948884724, | |
| "learning_rate": 2.457041333203852e-07, | |
| "logits/chosen": -1.607812523841858, | |
| "logits/rejected": -1.58984375, | |
| "logps/chosen": -0.973437488079071, | |
| "logps/rejected": -1.2390625476837158, | |
| "loss": 1.397, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.640625, | |
| "rewards/chosen": -2.434375047683716, | |
| "rewards/margins": 0.66748046875, | |
| "rewards/rejected": -3.1015625, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7158119658119658, | |
| "grad_norm": 20.070403111951517, | |
| "learning_rate": 2.2982227988814796e-07, | |
| "logits/chosen": -1.5625, | |
| "logits/rejected": -1.5578124523162842, | |
| "logps/chosen": -1.002343773841858, | |
| "logps/rejected": -1.299218773841858, | |
| "loss": 1.3534, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6578124761581421, | |
| "rewards/chosen": -2.503124952316284, | |
| "rewards/margins": 0.74609375, | |
| "rewards/rejected": -3.246875047683716, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7264957264957265, | |
| "grad_norm": 21.27637529256292, | |
| "learning_rate": 2.1431650167051918e-07, | |
| "logits/chosen": -1.6203124523162842, | |
| "logits/rejected": -1.599218726158142, | |
| "logps/chosen": -1.0, | |
| "logps/rejected": -1.2265625, | |
| "loss": 1.4229, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6484375, | |
| "rewards/chosen": -2.5, | |
| "rewards/margins": 0.564257800579071, | |
| "rewards/rejected": -3.065624952316284, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7371794871794872, | |
| "grad_norm": 26.660141655685035, | |
| "learning_rate": 1.992083820122259e-07, | |
| "logits/chosen": -1.5859375, | |
| "logits/rejected": -1.578125, | |
| "logps/chosen": -0.92578125, | |
| "logps/rejected": -1.1687500476837158, | |
| "loss": 1.3553, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6968749761581421, | |
| "rewards/chosen": -2.315624952316284, | |
| "rewards/margins": 0.6068359613418579, | |
| "rewards/rejected": -2.9203124046325684, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7478632478632479, | |
| "grad_norm": 28.652969469341908, | |
| "learning_rate": 1.845189507351964e-07, | |
| "logits/chosen": -1.5859375, | |
| "logits/rejected": -1.56640625, | |
| "logps/chosen": -0.8785156011581421, | |
| "logps/rejected": -1.14453125, | |
| "loss": 1.3487, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6703125238418579, | |
| "rewards/chosen": -2.1968750953674316, | |
| "rewards/margins": 0.663867175579071, | |
| "rewards/rejected": -2.864062547683716, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7585470085470085, | |
| "grad_norm": 31.505789159719555, | |
| "learning_rate": 1.7026865486599374e-07, | |
| "logits/chosen": -1.603906273841858, | |
| "logits/rejected": -1.5734374523162842, | |
| "logps/chosen": -1.0070312023162842, | |
| "logps/rejected": -1.28515625, | |
| "loss": 1.3709, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6343749761581421, | |
| "rewards/chosen": -2.518749952316284, | |
| "rewards/margins": 0.6943359375, | |
| "rewards/rejected": -3.2125000953674316, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 26.452453101979053, | |
| "learning_rate": 1.564773301744774e-07, | |
| "logits/chosen": -1.5984375476837158, | |
| "logits/rejected": -1.571874976158142, | |
| "logps/chosen": -0.9917968511581421, | |
| "logps/rejected": -1.2703125476837158, | |
| "loss": 1.3662, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.653124988079071, | |
| "rewards/chosen": -2.4765625, | |
| "rewards/margins": 0.6996093988418579, | |
| "rewards/rejected": -3.1781249046325684, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7799145299145299, | |
| "grad_norm": 22.60419152131841, | |
| "learning_rate": 1.431641735633044e-07, | |
| "logits/chosen": -1.62109375, | |
| "logits/rejected": -1.5906250476837158, | |
| "logps/chosen": -1.000390648841858, | |
| "logps/rejected": -1.2742187976837158, | |
| "loss": 1.3257, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.5, | |
| "rewards/margins": 0.6851562261581421, | |
| "rewards/rejected": -3.184375047683716, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7905982905982906, | |
| "grad_norm": 31.959715203438616, | |
| "learning_rate": 1.30347716346706e-07, | |
| "logits/chosen": -1.587499976158142, | |
| "logits/rejected": -1.5554687976837158, | |
| "logps/chosen": -1.006250023841858, | |
| "logps/rejected": -1.2453124523162842, | |
| "loss": 1.3749, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6578124761581421, | |
| "rewards/chosen": -2.515625, | |
| "rewards/margins": 0.598828136920929, | |
| "rewards/rejected": -3.112499952316284, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8012820512820513, | |
| "grad_norm": 26.131565932019587, | |
| "learning_rate": 1.1804579845573287e-07, | |
| "logits/chosen": -1.5859375, | |
| "logits/rejected": -1.5625, | |
| "logps/chosen": -0.985156238079071, | |
| "logps/rejected": -1.212499976158142, | |
| "loss": 1.408, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6390625238418579, | |
| "rewards/chosen": -2.465625047683716, | |
| "rewards/margins": 0.566210925579071, | |
| "rewards/rejected": -3.0296874046325684, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.811965811965812, | |
| "grad_norm": 22.064228308170097, | |
| "learning_rate": 1.0627554360587532e-07, | |
| "logits/chosen": -1.6296875476837158, | |
| "logits/rejected": -1.6007812023162842, | |
| "logps/chosen": -1.041406273841858, | |
| "logps/rejected": -1.36328125, | |
| "loss": 1.3429, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -2.6031250953674316, | |
| "rewards/margins": 0.803906261920929, | |
| "rewards/rejected": -3.4078125953674316, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8226495726495726, | |
| "grad_norm": 21.960589810609097, | |
| "learning_rate": 9.50533354616217e-08, | |
| "logits/chosen": -1.607031226158142, | |
| "logits/rejected": -1.58984375, | |
| "logps/chosen": -1.0390625, | |
| "logps/rejected": -1.306249976158142, | |
| "loss": 1.3265, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6953125, | |
| "rewards/chosen": -2.5953125953674316, | |
| "rewards/margins": 0.6675781011581421, | |
| "rewards/rejected": -3.262500047683716, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 31.587909830608158, | |
| "learning_rate": 8.439479483113682e-08, | |
| "logits/chosen": -1.610937476158142, | |
| "logits/rejected": -1.58203125, | |
| "logps/chosen": -1.0402343273162842, | |
| "logps/rejected": -1.338281273841858, | |
| "loss": 1.3555, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6859375238418579, | |
| "rewards/chosen": -2.598437547683716, | |
| "rewards/margins": 0.7417968511581421, | |
| "rewards/rejected": -3.339062452316284, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.844017094017094, | |
| "grad_norm": 22.55165739610421, | |
| "learning_rate": 7.431475792280017e-08, | |
| "logits/chosen": -1.5867187976837158, | |
| "logits/rejected": -1.5656249523162842, | |
| "logps/chosen": -1.017187476158142, | |
| "logps/rejected": -1.252343773841858, | |
| "loss": 1.3629, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.659375011920929, | |
| "rewards/chosen": -2.543750047683716, | |
| "rewards/margins": 0.588671863079071, | |
| "rewards/rejected": -3.1328125, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "grad_norm": 19.84309936265848, | |
| "learning_rate": 6.482725569387171e-08, | |
| "logits/chosen": -1.6015625, | |
| "logits/rejected": -1.5812499523162842, | |
| "logps/chosen": -1.046484351158142, | |
| "logps/rejected": -1.322656273841858, | |
| "loss": 1.3524, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6703125238418579, | |
| "rewards/chosen": -2.6171875, | |
| "rewards/margins": 0.6898437738418579, | |
| "rewards/rejected": -3.3062500953674316, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "eval_logits/chosen": -1.4735382795333862, | |
| "eval_logits/rejected": -1.4497227668762207, | |
| "eval_logps/chosen": -1.0328880548477173, | |
| "eval_logps/rejected": -1.3099168539047241, | |
| "eval_loss": 1.3493971824645996, | |
| "eval_nll_loss": 0.0, | |
| "eval_rewards/accuracies": 0.6670587062835693, | |
| "eval_rewards/chosen": -2.5819051265716553, | |
| "eval_rewards/margins": 0.6942256689071655, | |
| "eval_rewards/rejected": -3.276461601257324, | |
| "eval_runtime": 26.5068, | |
| "eval_samples_per_second": 73.981, | |
| "eval_steps_per_second": 2.339, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8653846153846154, | |
| "grad_norm": 26.041292961951424, | |
| "learning_rate": 5.594549432003243e-08, | |
| "logits/chosen": -1.631250023841858, | |
| "logits/rejected": -1.59375, | |
| "logps/chosen": -1.0828125476837158, | |
| "logps/rejected": -1.334375023841858, | |
| "loss": 1.3903, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -2.7093749046325684, | |
| "rewards/margins": 0.6244140863418579, | |
| "rewards/rejected": -3.332812547683716, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8760683760683761, | |
| "grad_norm": 32.518651766511766, | |
| "learning_rate": 4.76818368129821e-08, | |
| "logits/chosen": -1.6015625, | |
| "logits/rejected": -1.571874976158142, | |
| "logps/chosen": -1.058984398841858, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.3631, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.660937488079071, | |
| "rewards/chosen": -2.6484375, | |
| "rewards/margins": 0.633984386920929, | |
| "rewards/rejected": -3.2828125953674316, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.8867521367521367, | |
| "grad_norm": 23.06725788674345, | |
| "learning_rate": 4.004778581168411e-08, | |
| "logits/chosen": -1.588281273841858, | |
| "logits/rejected": -1.572656273841858, | |
| "logps/chosen": -1.028906226158142, | |
| "logps/rejected": -1.3125, | |
| "loss": 1.3308, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -2.5703125, | |
| "rewards/margins": 0.7134765386581421, | |
| "rewards/rejected": -3.284374952316284, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 24.18638777242838, | |
| "learning_rate": 3.305396757121037e-08, | |
| "logits/chosen": -1.62890625, | |
| "logits/rejected": -1.6124999523162842, | |
| "logps/chosen": -1.075781226158142, | |
| "logps/rejected": -1.33984375, | |
| "loss": 1.3695, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -2.6875, | |
| "rewards/margins": 0.6610351800918579, | |
| "rewards/rejected": -3.346874952316284, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9081196581196581, | |
| "grad_norm": 30.971842967518672, | |
| "learning_rate": 2.6710117171472757e-08, | |
| "logits/chosen": -1.571874976158142, | |
| "logits/rejected": -1.5421874523162842, | |
| "logps/chosen": -1.041015625, | |
| "logps/rejected": -1.30859375, | |
| "loss": 1.3354, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -2.6015625, | |
| "rewards/margins": 0.6728515625, | |
| "rewards/rejected": -3.2734375, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.9188034188034188, | |
| "grad_norm": 24.277716317235853, | |
| "learning_rate": 2.1025064966430694e-08, | |
| "logits/chosen": -1.627343773841858, | |
| "logits/rejected": -1.610937476158142, | |
| "logps/chosen": -1.0441405773162842, | |
| "logps/rejected": -1.3624999523162842, | |
| "loss": 1.3138, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.671875, | |
| "rewards/chosen": -2.612499952316284, | |
| "rewards/margins": 0.792187511920929, | |
| "rewards/rejected": -3.401562452316284, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9294871794871795, | |
| "grad_norm": 24.66405837343724, | |
| "learning_rate": 1.6006724292636166e-08, | |
| "logits/chosen": -1.6257812976837158, | |
| "logits/rejected": -1.6171875, | |
| "logps/chosen": -1.050390601158142, | |
| "logps/rejected": -1.3406250476837158, | |
| "loss": 1.3571, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6781250238418579, | |
| "rewards/chosen": -2.6265625953674316, | |
| "rewards/margins": 0.724414050579071, | |
| "rewards/rejected": -3.3515625, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9401709401709402, | |
| "grad_norm": 25.638182759285908, | |
| "learning_rate": 1.1662080454225509e-08, | |
| "logits/chosen": -1.5812499523162842, | |
| "logits/rejected": -1.5578124523162842, | |
| "logps/chosen": -1.0617187023162842, | |
| "logps/rejected": -1.337499976158142, | |
| "loss": 1.3339, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.692187488079071, | |
| "rewards/chosen": -2.653125047683716, | |
| "rewards/margins": 0.6910156011581421, | |
| "rewards/rejected": -3.3453125953674316, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9508547008547008, | |
| "grad_norm": 26.16854399233931, | |
| "learning_rate": 7.9971809996911e-09, | |
| "logits/chosen": -1.61328125, | |
| "logits/rejected": -1.603124976158142, | |
| "logps/chosen": -1.072656273841858, | |
| "logps/rejected": -1.350000023841858, | |
| "loss": 1.3319, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6859375238418579, | |
| "rewards/chosen": -2.6781249046325684, | |
| "rewards/margins": 0.694140613079071, | |
| "rewards/rejected": -3.371875047683716, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 26.197163635860893, | |
| "learning_rate": 5.017127303966084e-09, | |
| "logits/chosen": -1.6023437976837158, | |
| "logits/rejected": -1.5703125, | |
| "logps/chosen": -1.081640601158142, | |
| "logps/rejected": -1.359375, | |
| "loss": 1.3163, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.676562488079071, | |
| "rewards/chosen": -2.703125, | |
| "rewards/margins": 0.699023425579071, | |
| "rewards/rejected": -3.4046874046325684, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9722222222222222, | |
| "grad_norm": 27.97092955661895, | |
| "learning_rate": 2.7260674675404496e-09, | |
| "logits/chosen": -1.62109375, | |
| "logits/rejected": -1.591406226158142, | |
| "logps/chosen": -1.099218726158142, | |
| "logps/rejected": -1.3468749523162842, | |
| "loss": 1.4061, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.635937511920929, | |
| "rewards/chosen": -2.7484374046325684, | |
| "rewards/margins": 0.615039050579071, | |
| "rewards/rejected": -3.362499952316284, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9829059829059829, | |
| "grad_norm": 27.659006594737694, | |
| "learning_rate": 1.1271905424918293e-09, | |
| "logits/chosen": -1.587499976158142, | |
| "logits/rejected": -1.580468773841858, | |
| "logps/chosen": -1.107812523841858, | |
| "logps/rejected": -1.3742187023162842, | |
| "loss": 1.3842, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.6484375, | |
| "rewards/chosen": -2.7671875953674316, | |
| "rewards/margins": 0.6664062738418579, | |
| "rewards/rejected": -3.4359374046325684, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9935897435897436, | |
| "grad_norm": 26.950711678907783, | |
| "learning_rate": 2.227220934688523e-10, | |
| "logits/chosen": -1.614843726158142, | |
| "logits/rejected": -1.5890624523162842, | |
| "logps/chosen": -1.0382812023162842, | |
| "logps/rejected": -1.350000023841858, | |
| "loss": 1.3106, | |
| "nll_loss": 0.0, | |
| "rewards/accuracies": 0.660937488079071, | |
| "rewards/chosen": -2.598437547683716, | |
| "rewards/margins": 0.7733398675918579, | |
| "rewards/rejected": -3.3734374046325684, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 468, | |
| "total_flos": 0.0, | |
| "train_loss": 1.4527900891426282, | |
| "train_runtime": 3187.3315, | |
| "train_samples_per_second": 18.786, | |
| "train_steps_per_second": 0.147 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 468, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |