AmberYifan's picture
Model save
955d7ae verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 1319,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.000758150113722517,
"grad_norm": 210.55908519352158,
"learning_rate": 0.0,
"logits/chosen": 0.103515625,
"logits/rejected": -0.091796875,
"logps/chosen": -268.0,
"logps/rejected": -424.0,
"loss": 0.6914,
"nll_loss": 0.7265625,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.0075815011372251705,
"grad_norm": 241.994037571191,
"learning_rate": 3.4090909090909086e-08,
"logits/chosen": 0.0464274100959301,
"logits/rejected": -0.0338541679084301,
"logps/chosen": -718.888916015625,
"logps/rejected": -731.7777709960938,
"loss": 0.6853,
"nll_loss": 1.5737847089767456,
"rewards/accuracies": 0.2916666567325592,
"rewards/chosen": -0.0027330187149345875,
"rewards/margins": 0.0465325266122818,
"rewards/rejected": -0.0492689348757267,
"step": 10
},
{
"epoch": 0.015163002274450341,
"grad_norm": 219.94111891737467,
"learning_rate": 7.196969696969697e-08,
"logits/chosen": 0.04271240159869194,
"logits/rejected": -0.11677245795726776,
"logps/chosen": -529.0,
"logps/rejected": -526.5999755859375,
"loss": 0.7042,
"nll_loss": 1.3855469226837158,
"rewards/accuracies": 0.25,
"rewards/chosen": -0.0008987426990643144,
"rewards/margins": -0.004055785946547985,
"rewards/rejected": 0.00311279296875,
"step": 20
},
{
"epoch": 0.022744503411675512,
"grad_norm": 226.72439288143673,
"learning_rate": 1.0984848484848484e-07,
"logits/chosen": 0.06171875074505806,
"logits/rejected": -0.10219726711511612,
"logps/chosen": -562.0,
"logps/rejected": -548.7999877929688,
"loss": 0.7261,
"nll_loss": 1.4132812023162842,
"rewards/accuracies": 0.15000000596046448,
"rewards/chosen": -0.02811889722943306,
"rewards/margins": -0.05191650241613388,
"rewards/rejected": 0.02377929724752903,
"step": 30
},
{
"epoch": 0.030326004548900682,
"grad_norm": 278.9678714919932,
"learning_rate": 1.4772727272727272e-07,
"logits/chosen": 0.09233398735523224,
"logits/rejected": 0.05045165866613388,
"logps/chosen": -507.6000061035156,
"logps/rejected": -500.8999938964844,
"loss": 0.6926,
"nll_loss": 1.4015624523162842,
"rewards/accuracies": 0.22499999403953552,
"rewards/chosen": 0.003143310546875,
"rewards/margins": 0.0072265625931322575,
"rewards/rejected": -0.0040679932571947575,
"step": 40
},
{
"epoch": 0.03790750568612585,
"grad_norm": 290.4234528231276,
"learning_rate": 1.856060606060606e-07,
"logits/chosen": 0.07624511420726776,
"logits/rejected": -0.01950683631002903,
"logps/chosen": -615.5999755859375,
"logps/rejected": -609.0,
"loss": 0.7079,
"nll_loss": 1.257421851158142,
"rewards/accuracies": 0.3125,
"rewards/chosen": -0.02750244177877903,
"rewards/margins": -0.009429931640625,
"rewards/rejected": -0.018157958984375,
"step": 50
},
{
"epoch": 0.045489006823351025,
"grad_norm": 188.43845642237048,
"learning_rate": 2.2348484848484846e-07,
"logits/chosen": 0.04985351487994194,
"logits/rejected": -0.03656005859375,
"logps/chosen": -567.7999877929688,
"logps/rejected": -537.0,
"loss": 0.6684,
"nll_loss": 1.212499976158142,
"rewards/accuracies": 0.3125,
"rewards/chosen": 0.02692871168255806,
"rewards/margins": 0.07955016940832138,
"rewards/rejected": -0.05264892429113388,
"step": 60
},
{
"epoch": 0.05307050796057619,
"grad_norm": 232.64144162428008,
"learning_rate": 2.6136363636363634e-07,
"logits/chosen": 0.11154785007238388,
"logits/rejected": -0.06640625,
"logps/chosen": -487.0,
"logps/rejected": -482.3999938964844,
"loss": 0.65,
"nll_loss": 1.366796851158142,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.10163573920726776,
"rewards/margins": 0.10463867336511612,
"rewards/rejected": -0.0027893066871911287,
"step": 70
},
{
"epoch": 0.060652009097801364,
"grad_norm": 192.2740828794497,
"learning_rate": 2.9924242424242425e-07,
"logits/chosen": 0.01751098595559597,
"logits/rejected": -0.09003905951976776,
"logps/chosen": -620.7999877929688,
"logps/rejected": -595.5999755859375,
"loss": 0.6192,
"nll_loss": 1.321874976158142,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.17548827826976776,
"rewards/margins": 0.23996582627296448,
"rewards/rejected": -0.06424560397863388,
"step": 80
},
{
"epoch": 0.06823351023502654,
"grad_norm": 146.481409622457,
"learning_rate": 3.371212121212121e-07,
"logits/chosen": 0.14125975966453552,
"logits/rejected": -0.03948974609375,
"logps/chosen": -443.20001220703125,
"logps/rejected": -513.7999877929688,
"loss": 0.5497,
"nll_loss": 1.1339843273162842,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.3358398377895355,
"rewards/margins": 0.4330078065395355,
"rewards/rejected": -0.09690551459789276,
"step": 90
},
{
"epoch": 0.0758150113722517,
"grad_norm": 227.90269927612718,
"learning_rate": 3.75e-07,
"logits/chosen": 0.15236815810203552,
"logits/rejected": -0.06716308742761612,
"logps/chosen": -483.20001220703125,
"logps/rejected": -490.3999938964844,
"loss": 0.5727,
"nll_loss": 1.2472655773162842,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.44428712129592896,
"rewards/margins": 0.4510253965854645,
"rewards/rejected": -0.006976318545639515,
"step": 100
},
{
"epoch": 0.08339651250947688,
"grad_norm": 156.4379588057543,
"learning_rate": 4.1287878787878786e-07,
"logits/chosen": 0.05620117112994194,
"logits/rejected": -0.04985351487994194,
"logps/chosen": -581.2000122070312,
"logps/rejected": -519.2000122070312,
"loss": 0.4982,
"nll_loss": 1.1378905773162842,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.47028809785842896,
"rewards/margins": 0.6709960699081421,
"rewards/rejected": -0.20065002143383026,
"step": 110
},
{
"epoch": 0.09097801364670205,
"grad_norm": 105.91864437535659,
"learning_rate": 4.507575757575757e-07,
"logits/chosen": 0.11386718600988388,
"logits/rejected": -0.05363769456744194,
"logps/chosen": -446.6000061035156,
"logps/rejected": -519.5999755859375,
"loss": 0.5331,
"nll_loss": 1.158203125,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.4749999940395355,
"rewards/margins": 0.6854003667831421,
"rewards/rejected": -0.21049194037914276,
"step": 120
},
{
"epoch": 0.09855951478392722,
"grad_norm": 163.62052291292468,
"learning_rate": 4.886363636363636e-07,
"logits/chosen": 0.04210205003619194,
"logits/rejected": -0.01055908203125,
"logps/chosen": -513.2000122070312,
"logps/rejected": -441.79998779296875,
"loss": 0.5658,
"nll_loss": 1.200781226158142,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.21035155653953552,
"rewards/margins": 0.517773449420929,
"rewards/rejected": -0.3070312440395355,
"step": 130
},
{
"epoch": 0.10614101592115238,
"grad_norm": 99.07353233355678,
"learning_rate": 4.970513900589722e-07,
"logits/chosen": 0.02568359300494194,
"logits/rejected": -0.09825439751148224,
"logps/chosen": -674.0,
"logps/rejected": -668.0,
"loss": 0.5372,
"nll_loss": 1.2999999523162842,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.5035644769668579,
"rewards/margins": 0.765917956829071,
"rewards/rejected": -0.2628173828125,
"step": 140
},
{
"epoch": 0.11372251705837756,
"grad_norm": 202.594954494209,
"learning_rate": 4.928390901432181e-07,
"logits/chosen": 0.137481689453125,
"logits/rejected": 0.03671874850988388,
"logps/chosen": -447.0,
"logps/rejected": -395.79998779296875,
"loss": 0.5368,
"nll_loss": 1.240625023841858,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.985156238079071,
"rewards/margins": 0.844433605670929,
"rewards/rejected": 0.13916015625,
"step": 150
},
{
"epoch": 0.12130401819560273,
"grad_norm": 126.84406592888543,
"learning_rate": 4.886267902274642e-07,
"logits/chosen": 0.081298828125,
"logits/rejected": -0.04389648512005806,
"logps/chosen": -450.0,
"logps/rejected": -492.0,
"loss": 0.5079,
"nll_loss": 1.365625023841858,
"rewards/accuracies": 0.6875,
"rewards/chosen": 1.095312476158142,
"rewards/margins": 0.84423828125,
"rewards/rejected": 0.25,
"step": 160
},
{
"epoch": 0.1288855193328279,
"grad_norm": 151.66256652761416,
"learning_rate": 4.844144903117102e-07,
"logits/chosen": 0.11357422173023224,
"logits/rejected": -0.02805175818502903,
"logps/chosen": -413.79998779296875,
"logps/rejected": -412.79998779296875,
"loss": 0.4046,
"nll_loss": 1.167578101158142,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.037500023841858,
"rewards/margins": 1.275781273841858,
"rewards/rejected": -0.23802490532398224,
"step": 170
},
{
"epoch": 0.13646702047005307,
"grad_norm": 159.18227673830674,
"learning_rate": 4.802021903959561e-07,
"logits/chosen": 0.05300293117761612,
"logits/rejected": -0.04742431640625,
"logps/chosen": -607.7999877929688,
"logps/rejected": -567.0,
"loss": 0.4766,
"nll_loss": 1.204687476158142,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.9892578125,
"rewards/margins": 1.1962890625,
"rewards/rejected": -0.20742186903953552,
"step": 180
},
{
"epoch": 0.14404852160727824,
"grad_norm": 81.59362392185547,
"learning_rate": 4.759898904802022e-07,
"logits/chosen": 0.11760254204273224,
"logits/rejected": -0.06528320163488388,
"logps/chosen": -487.79998779296875,
"logps/rejected": -446.6000061035156,
"loss": 0.5373,
"nll_loss": 1.3742187023162842,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.6246887445449829,
"rewards/margins": 1.1572265625,
"rewards/rejected": -0.5337585210800171,
"step": 190
},
{
"epoch": 0.1516300227445034,
"grad_norm": 247.23828828724723,
"learning_rate": 4.7177759056444814e-07,
"logits/chosen": 0.17337647080421448,
"logits/rejected": 0.041778564453125,
"logps/chosen": -546.2000122070312,
"logps/rejected": -551.4000244140625,
"loss": 0.5237,
"nll_loss": 1.338281273841858,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.5738281011581421,
"rewards/margins": 0.996289074420929,
"rewards/rejected": -0.4232421815395355,
"step": 200
},
{
"epoch": 0.15921152388172857,
"grad_norm": 177.0807309921808,
"learning_rate": 4.6756529064869416e-07,
"logits/chosen": 0.1591796875,
"logits/rejected": 0.11337890475988388,
"logps/chosen": -491.70001220703125,
"logps/rejected": -469.6000061035156,
"loss": 0.4459,
"nll_loss": 1.226171851158142,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 1.101953148841858,
"rewards/margins": 1.295312523841858,
"rewards/rejected": -0.19340820610523224,
"step": 210
},
{
"epoch": 0.16679302501895377,
"grad_norm": 140.05095261979244,
"learning_rate": 4.633529907329402e-07,
"logits/chosen": 0.09412841498851776,
"logits/rejected": -0.07822265475988388,
"logps/chosen": -696.7999877929688,
"logps/rejected": -695.2000122070312,
"loss": 0.4674,
"nll_loss": 1.373046875,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 1.043554663658142,
"rewards/margins": 1.4679687023162842,
"rewards/rejected": -0.42412108182907104,
"step": 220
},
{
"epoch": 0.17437452615617893,
"grad_norm": 143.39089630193985,
"learning_rate": 4.5914069081718614e-07,
"logits/chosen": 0.15617676079273224,
"logits/rejected": 0.0377197265625,
"logps/chosen": -533.5999755859375,
"logps/rejected": -573.5999755859375,
"loss": 0.4671,
"nll_loss": 1.2648437023162842,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.642871081829071,
"rewards/margins": 1.3449218273162842,
"rewards/rejected": -0.7015625238418579,
"step": 230
},
{
"epoch": 0.1819560272934041,
"grad_norm": 174.53324433123169,
"learning_rate": 4.5492839090143215e-07,
"logits/chosen": 0.19760742783546448,
"logits/rejected": 0.0694580078125,
"logps/chosen": -479.0,
"logps/rejected": -460.79998779296875,
"loss": 0.5019,
"nll_loss": 1.2683594226837158,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.4159912168979645,
"rewards/margins": 1.125390648841858,
"rewards/rejected": -0.709277331829071,
"step": 240
},
{
"epoch": 0.18953752843062927,
"grad_norm": 96.48112981479892,
"learning_rate": 4.5071609098567817e-07,
"logits/chosen": 0.11848144233226776,
"logits/rejected": 0.06085815280675888,
"logps/chosen": -738.5999755859375,
"logps/rejected": -651.7999877929688,
"loss": 0.4883,
"nll_loss": 1.408593773841858,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.83154296875,
"rewards/margins": 1.3964354991912842,
"rewards/rejected": -0.5653320550918579,
"step": 250
},
{
"epoch": 0.19711902956785443,
"grad_norm": 342.616952137923,
"learning_rate": 4.4650379106992413e-07,
"logits/chosen": 0.1614990234375,
"logits/rejected": 0.04248046875,
"logps/chosen": -526.4000244140625,
"logps/rejected": -575.4000244140625,
"loss": 0.5252,
"nll_loss": 1.343359351158142,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 1.203125,
"rewards/margins": 1.326440453529358,
"rewards/rejected": -0.12354736030101776,
"step": 260
},
{
"epoch": 0.2047005307050796,
"grad_norm": 65.27787243249955,
"learning_rate": 4.4229149115417014e-07,
"logits/chosen": 0.17680664360523224,
"logits/rejected": 0.04179687425494194,
"logps/chosen": -528.2000122070312,
"logps/rejected": -584.0,
"loss": 0.4598,
"nll_loss": 1.197656273841858,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 1.369140625,
"rewards/margins": 1.4425780773162842,
"rewards/rejected": -0.07305908203125,
"step": 270
},
{
"epoch": 0.21228203184230476,
"grad_norm": 201.96030634728163,
"learning_rate": 4.3807919123841616e-07,
"logits/chosen": 0.2735351622104645,
"logits/rejected": 0.13618774712085724,
"logps/chosen": -413.1000061035156,
"logps/rejected": -387.8999938964844,
"loss": 0.4424,
"nll_loss": 1.1570312976837158,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 1.322656273841858,
"rewards/margins": 1.4929687976837158,
"rewards/rejected": -0.17158202826976776,
"step": 280
},
{
"epoch": 0.21986353297952996,
"grad_norm": 244.51972827175538,
"learning_rate": 4.338668913226621e-07,
"logits/chosen": 0.17041015625,
"logits/rejected": 0.04808349534869194,
"logps/chosen": -576.2000122070312,
"logps/rejected": -552.7999877929688,
"loss": 0.4339,
"nll_loss": 1.224218726158142,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 1.2804687023162842,
"rewards/margins": 1.711328148841858,
"rewards/rejected": -0.4312988221645355,
"step": 290
},
{
"epoch": 0.22744503411675512,
"grad_norm": 160.90356259462718,
"learning_rate": 4.2965459140690813e-07,
"logits/chosen": 0.14516600966453552,
"logits/rejected": 0.06256103515625,
"logps/chosen": -564.4000244140625,
"logps/rejected": -541.4000244140625,
"loss": 0.416,
"nll_loss": 1.3507812023162842,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 1.1989257335662842,
"rewards/margins": 1.465234398841858,
"rewards/rejected": -0.2666015625,
"step": 300
},
{
"epoch": 0.2350265352539803,
"grad_norm": 122.92067457976098,
"learning_rate": 4.2544229149115415e-07,
"logits/chosen": 0.16321411728858948,
"logits/rejected": 0.02617187425494194,
"logps/chosen": -500.6000061035156,
"logps/rejected": -510.20001220703125,
"loss": 0.5279,
"nll_loss": 1.127343773841858,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 1.3679687976837158,
"rewards/margins": 1.8019530773162842,
"rewards/rejected": -0.43256837129592896,
"step": 310
},
{
"epoch": 0.24260803639120546,
"grad_norm": 111.42348359831298,
"learning_rate": 4.212299915754001e-07,
"logits/chosen": 0.20351561903953552,
"logits/rejected": 0.10941161960363388,
"logps/chosen": -626.2000122070312,
"logps/rejected": -613.7999877929688,
"loss": 0.4295,
"nll_loss": 1.22265625,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.866992175579071,
"rewards/margins": 1.7000000476837158,
"rewards/rejected": -0.8323730230331421,
"step": 320
},
{
"epoch": 0.25018953752843065,
"grad_norm": 95.27457125391763,
"learning_rate": 4.170176916596461e-07,
"logits/chosen": 0.26093751192092896,
"logits/rejected": 0.1373291015625,
"logps/chosen": -444.0,
"logps/rejected": -464.0,
"loss": 0.383,
"nll_loss": 1.2921874523162842,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 1.0666992664337158,
"rewards/margins": 1.975000023841858,
"rewards/rejected": -0.9068603515625,
"step": 330
},
{
"epoch": 0.2577710386656558,
"grad_norm": 138.3345438826051,
"learning_rate": 4.128053917438922e-07,
"logits/chosen": 0.2812744081020355,
"logits/rejected": 0.12603759765625,
"logps/chosen": -425.79998779296875,
"logps/rejected": -443.79998779296875,
"loss": 0.3626,
"nll_loss": 1.155859351158142,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.916552722454071,
"rewards/margins": 2.116406202316284,
"rewards/rejected": -1.199462890625,
"step": 340
},
{
"epoch": 0.265352539802881,
"grad_norm": 248.2578962865936,
"learning_rate": 4.0859309182813815e-07,
"logits/chosen": 0.19716186821460724,
"logits/rejected": -0.0211181640625,
"logps/chosen": -702.5999755859375,
"logps/rejected": -697.4000244140625,
"loss": 0.4761,
"nll_loss": 1.3152344226837158,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 1.0244140625,
"rewards/margins": 1.7078125476837158,
"rewards/rejected": -0.683398425579071,
"step": 350
},
{
"epoch": 0.27293404094010615,
"grad_norm": 165.199706750475,
"learning_rate": 4.0438079191238417e-07,
"logits/chosen": 0.20786742866039276,
"logits/rejected": 0.07772216945886612,
"logps/chosen": -603.0,
"logps/rejected": -621.4000244140625,
"loss": 0.4578,
"nll_loss": 1.168359398841858,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.0081055164337158,
"rewards/margins": 1.6279296875,
"rewards/rejected": -0.62451171875,
"step": 360
},
{
"epoch": 0.2805155420773313,
"grad_norm": 121.96006947274803,
"learning_rate": 4.001684919966302e-07,
"logits/chosen": 0.2735351622104645,
"logits/rejected": 0.09089355170726776,
"logps/chosen": -439.0,
"logps/rejected": -460.20001220703125,
"loss": 0.3829,
"nll_loss": 1.330468773841858,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.996289074420929,
"rewards/margins": 1.541015625,
"rewards/rejected": -0.5445801019668579,
"step": 370
},
{
"epoch": 0.2880970432145565,
"grad_norm": 182.5442973440352,
"learning_rate": 3.9595619208087615e-07,
"logits/chosen": 0.1871185302734375,
"logits/rejected": 0.02005615271627903,
"logps/chosen": -427.0,
"logps/rejected": -429.79998779296875,
"loss": 0.3873,
"nll_loss": 1.224218726158142,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 1.036474585533142,
"rewards/margins": 2.098437547683716,
"rewards/rejected": -1.060937523841858,
"step": 380
},
{
"epoch": 0.29567854435178165,
"grad_norm": 265.42006162215375,
"learning_rate": 3.9174389216512216e-07,
"logits/chosen": 0.12451171875,
"logits/rejected": -0.06752929836511612,
"logps/chosen": -522.5999755859375,
"logps/rejected": -547.0999755859375,
"loss": 0.3694,
"nll_loss": 1.4851562976837158,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.9535156488418579,
"rewards/margins": 2.038378953933716,
"rewards/rejected": -1.085546851158142,
"step": 390
},
{
"epoch": 0.3032600454890068,
"grad_norm": 121.52240610984677,
"learning_rate": 3.875315922493682e-07,
"logits/chosen": 0.21596679091453552,
"logits/rejected": 0.08803711086511612,
"logps/chosen": -522.2000122070312,
"logps/rejected": -518.4000244140625,
"loss": 0.3509,
"nll_loss": 1.23828125,
"rewards/accuracies": 0.8125,
"rewards/chosen": 1.262304663658142,
"rewards/margins": 2.3251953125,
"rewards/rejected": -1.0622069835662842,
"step": 400
},
{
"epoch": 0.310841546626232,
"grad_norm": 198.63570140647747,
"learning_rate": 3.8331929233361414e-07,
"logits/chosen": 0.26396483182907104,
"logits/rejected": 0.04107666015625,
"logps/chosen": -541.0,
"logps/rejected": -614.5999755859375,
"loss": 0.4102,
"nll_loss": 1.206640601158142,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.7144531011581421,
"rewards/margins": 1.920312523841858,
"rewards/rejected": -1.2052490711212158,
"step": 410
},
{
"epoch": 0.31842304776345715,
"grad_norm": 124.7572195361246,
"learning_rate": 3.7910699241786015e-07,
"logits/chosen": 0.12045898288488388,
"logits/rejected": 0.06239013746380806,
"logps/chosen": -647.5999755859375,
"logps/rejected": -664.7999877929688,
"loss": 0.4872,
"nll_loss": 1.5203125476837158,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.3798828125,
"rewards/margins": 1.7646484375,
"rewards/rejected": -1.384179711341858,
"step": 420
},
{
"epoch": 0.3260045489006823,
"grad_norm": 303.88739862771314,
"learning_rate": 3.7489469250210617e-07,
"logits/chosen": 0.13037109375,
"logits/rejected": -0.02709350548684597,
"logps/chosen": -591.0,
"logps/rejected": -558.7999877929688,
"loss": 0.4929,
"nll_loss": 1.4070312976837158,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.7420898675918579,
"rewards/margins": 1.9757812023162842,
"rewards/rejected": -1.232421875,
"step": 430
},
{
"epoch": 0.33358605003790753,
"grad_norm": 339.23070637711777,
"learning_rate": 3.7068239258635213e-07,
"logits/chosen": 0.24921874701976776,
"logits/rejected": 0.11030273139476776,
"logps/chosen": -608.0999755859375,
"logps/rejected": -606.5999755859375,
"loss": 0.6025,
"nll_loss": 1.1183593273162842,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.6957031488418579,
"rewards/margins": 1.5625,
"rewards/rejected": -0.8663085699081421,
"step": 440
},
{
"epoch": 0.3411675511751327,
"grad_norm": 196.8785396662436,
"learning_rate": 3.6647009267059814e-07,
"logits/chosen": 0.15043945610523224,
"logits/rejected": 0.01263427734375,
"logps/chosen": -525.4000244140625,
"logps/rejected": -506.79998779296875,
"loss": 0.3875,
"nll_loss": 1.2882812023162842,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.885937511920929,
"rewards/margins": 2.108203172683716,
"rewards/rejected": -1.2228515148162842,
"step": 450
},
{
"epoch": 0.34874905231235787,
"grad_norm": 129.86600239128992,
"learning_rate": 3.6225779275484416e-07,
"logits/chosen": 0.09770508110523224,
"logits/rejected": -0.0045104981400072575,
"logps/chosen": -568.5999755859375,
"logps/rejected": -582.5999755859375,
"loss": 0.3584,
"nll_loss": 1.3078124523162842,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.759228527545929,
"rewards/margins": 2.2353515625,
"rewards/rejected": -1.474609375,
"step": 460
},
{
"epoch": 0.35633055344958303,
"grad_norm": 198.9132732417745,
"learning_rate": 3.580454928390901e-07,
"logits/chosen": 0.2828125059604645,
"logits/rejected": 0.06529541313648224,
"logps/chosen": -569.5999755859375,
"logps/rejected": -673.0,
"loss": 1.691,
"nll_loss": 1.230078101158142,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 1.353906273841858,
"rewards/margins": 0.9765625,
"rewards/rejected": 0.3705078065395355,
"step": 470
},
{
"epoch": 0.3639120545868082,
"grad_norm": 162.93774383329688,
"learning_rate": 3.5383319292333613e-07,
"logits/chosen": 0.22368164360523224,
"logits/rejected": 0.13312987983226776,
"logps/chosen": -472.20001220703125,
"logps/rejected": -433.20001220703125,
"loss": 0.3808,
"nll_loss": 1.1730468273162842,
"rewards/accuracies": 0.8125,
"rewards/chosen": 1.345703125,
"rewards/margins": 1.978906273841858,
"rewards/rejected": -0.63232421875,
"step": 480
},
{
"epoch": 0.37149355572403336,
"grad_norm": 314.69450736010833,
"learning_rate": 3.4962089300758215e-07,
"logits/chosen": 0.19184570014476776,
"logits/rejected": 0.01174316368997097,
"logps/chosen": -516.5999755859375,
"logps/rejected": -530.0,
"loss": 0.5622,
"nll_loss": 1.2273437976837158,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 1.878515601158142,
"rewards/margins": 1.8523437976837158,
"rewards/rejected": 0.02460937574505806,
"step": 490
},
{
"epoch": 0.37907505686125853,
"grad_norm": 127.11791996065408,
"learning_rate": 3.454085930918281e-07,
"logits/chosen": 0.32811278104782104,
"logits/rejected": 0.15139159560203552,
"logps/chosen": -514.0,
"logps/rejected": -559.5999755859375,
"loss": 0.4157,
"nll_loss": 1.1749999523162842,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 1.127539038658142,
"rewards/margins": 1.6062500476837158,
"rewards/rejected": -0.4796142578125,
"step": 500
},
{
"epoch": 0.3866565579984837,
"grad_norm": 110.73418365791827,
"learning_rate": 3.411962931760741e-07,
"logits/chosen": 0.16527099907398224,
"logits/rejected": -0.002532958984375,
"logps/chosen": -615.5999755859375,
"logps/rejected": -609.4000244140625,
"loss": 0.3923,
"nll_loss": 1.234375,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.1682708263397217,
"rewards/margins": 1.9921875,
"rewards/rejected": -0.822509765625,
"step": 510
},
{
"epoch": 0.39423805913570886,
"grad_norm": 113.57088735550161,
"learning_rate": 3.3698399326032014e-07,
"logits/chosen": 0.13564452528953552,
"logits/rejected": -0.02927246131002903,
"logps/chosen": -630.9000244140625,
"logps/rejected": -627.5999755859375,
"loss": 0.3565,
"nll_loss": 1.330468773841858,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.7025390863418579,
"rewards/margins": 2.2406249046325684,
"rewards/rejected": -1.537500023841858,
"step": 520
},
{
"epoch": 0.40181956027293403,
"grad_norm": 114.88993123662492,
"learning_rate": 3.327716933445661e-07,
"logits/chosen": 0.2532714903354645,
"logits/rejected": 0.14990234375,
"logps/chosen": -532.2000122070312,
"logps/rejected": -546.4000244140625,
"loss": 0.3645,
"nll_loss": 1.03125,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.9659668207168579,
"rewards/margins": 2.059375047683716,
"rewards/rejected": -1.091894507408142,
"step": 530
},
{
"epoch": 0.4094010614101592,
"grad_norm": 140.5881761703118,
"learning_rate": 3.285593934288121e-07,
"logits/chosen": 0.24760742485523224,
"logits/rejected": 0.0767822265625,
"logps/chosen": -570.0,
"logps/rejected": -560.9000244140625,
"loss": 0.3599,
"nll_loss": 1.072656273841858,
"rewards/accuracies": 0.8125,
"rewards/chosen": 1.642431616783142,
"rewards/margins": 2.5054688453674316,
"rewards/rejected": -0.8681640625,
"step": 540
},
{
"epoch": 0.41698256254738436,
"grad_norm": 124.0882733071695,
"learning_rate": 3.2434709351305813e-07,
"logits/chosen": 0.19780273735523224,
"logits/rejected": 0.09107665717601776,
"logps/chosen": -652.7999877929688,
"logps/rejected": -671.4000244140625,
"loss": 0.4856,
"nll_loss": 1.3191406726837158,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 1.3234374523162842,
"rewards/margins": 1.916015625,
"rewards/rejected": -0.5931396484375,
"step": 550
},
{
"epoch": 0.4245640636846095,
"grad_norm": 72.26271440273402,
"learning_rate": 3.201347935973041e-07,
"logits/chosen": 0.16713866591453552,
"logits/rejected": 0.04147949069738388,
"logps/chosen": -532.2000122070312,
"logps/rejected": -541.0,
"loss": 0.3617,
"nll_loss": 1.1613280773162842,
"rewards/accuracies": 0.8125,
"rewards/chosen": 1.2734375,
"rewards/margins": 2.1117186546325684,
"rewards/rejected": -0.837597668170929,
"step": 560
},
{
"epoch": 0.43214556482183475,
"grad_norm": 150.34076527214728,
"learning_rate": 3.159224936815501e-07,
"logits/chosen": 0.23625488579273224,
"logits/rejected": 0.05903320387005806,
"logps/chosen": -489.79998779296875,
"logps/rejected": -441.5,
"loss": 0.4574,
"nll_loss": 1.159765601158142,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 1.4072265625,
"rewards/margins": 1.7890625,
"rewards/rejected": -0.3818359375,
"step": 570
},
{
"epoch": 0.4397270659590599,
"grad_norm": 153.1693060134136,
"learning_rate": 3.117101937657961e-07,
"logits/chosen": 0.23941650986671448,
"logits/rejected": 0.04136962816119194,
"logps/chosen": -540.2000122070312,
"logps/rejected": -460.0,
"loss": 0.3712,
"nll_loss": 1.142187476158142,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.1106445789337158,
"rewards/margins": 2.3414063453674316,
"rewards/rejected": -1.228906273841858,
"step": 580
},
{
"epoch": 0.4473085670962851,
"grad_norm": 194.82248152259902,
"learning_rate": 3.074978938500421e-07,
"logits/chosen": 0.17299804091453552,
"logits/rejected": 0.06650390475988388,
"logps/chosen": -717.0,
"logps/rejected": -706.2000122070312,
"loss": 0.4601,
"nll_loss": 1.144921898841858,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 1.5226562023162842,
"rewards/margins": 1.931640625,
"rewards/rejected": -0.41069334745407104,
"step": 590
},
{
"epoch": 0.45489006823351025,
"grad_norm": 130.26234365994824,
"learning_rate": 3.032855939342881e-07,
"logits/chosen": 0.2310791015625,
"logits/rejected": 0.10465087741613388,
"logps/chosen": -544.4000244140625,
"logps/rejected": -530.7999877929688,
"loss": 0.3822,
"nll_loss": 1.168359398841858,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.578125,
"rewards/margins": 2.103515625,
"rewards/rejected": -0.527148425579071,
"step": 600
},
{
"epoch": 0.4624715693707354,
"grad_norm": 771.0753113803045,
"learning_rate": 2.990732940185341e-07,
"logits/chosen": 0.26826173067092896,
"logits/rejected": 0.116119384765625,
"logps/chosen": -585.2000122070312,
"logps/rejected": -631.0,
"loss": 0.5163,
"nll_loss": 1.3234374523162842,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 1.073632836341858,
"rewards/margins": 1.968359351158142,
"rewards/rejected": -0.895214855670929,
"step": 610
},
{
"epoch": 0.4700530705079606,
"grad_norm": 146.97598084251982,
"learning_rate": 2.948609941027801e-07,
"logits/chosen": 0.03544921800494194,
"logits/rejected": -0.01901855506002903,
"logps/chosen": -692.5999755859375,
"logps/rejected": -693.5999755859375,
"loss": 0.3872,
"nll_loss": 1.3894531726837158,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 1.142578125,
"rewards/margins": 2.372265577316284,
"rewards/rejected": -1.22998046875,
"step": 620
},
{
"epoch": 0.47763457164518575,
"grad_norm": 231.26312952989144,
"learning_rate": 2.906486941870261e-07,
"logits/chosen": 0.11240234225988388,
"logits/rejected": 0.06850586086511612,
"logps/chosen": -679.5999755859375,
"logps/rejected": -561.5999755859375,
"loss": 0.4133,
"nll_loss": 1.369531273841858,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.1198608875274658,
"rewards/margins": 2.32421875,
"rewards/rejected": -1.2004883289337158,
"step": 630
},
{
"epoch": 0.4852160727824109,
"grad_norm": 228.98492770996447,
"learning_rate": 2.864363942712721e-07,
"logits/chosen": 0.12601622939109802,
"logits/rejected": 0.012011718936264515,
"logps/chosen": -680.2000122070312,
"logps/rejected": -615.4000244140625,
"loss": 0.5076,
"nll_loss": 1.203515648841858,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 1.0344727039337158,
"rewards/margins": 1.968359351158142,
"rewards/rejected": -0.933398425579071,
"step": 640
},
{
"epoch": 0.4927975739196361,
"grad_norm": 101.98305639734917,
"learning_rate": 2.8222409435551807e-07,
"logits/chosen": 0.24750976264476776,
"logits/rejected": 0.10369262844324112,
"logps/chosen": -527.2000122070312,
"logps/rejected": -538.0,
"loss": 0.5104,
"nll_loss": 1.207421898841858,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.8055664300918579,
"rewards/margins": 1.642187476158142,
"rewards/rejected": -0.8370116949081421,
"step": 650
},
{
"epoch": 0.5003790750568613,
"grad_norm": 86.66361253377694,
"learning_rate": 2.780117944397641e-07,
"logits/chosen": 0.23603515326976776,
"logits/rejected": 0.10183105617761612,
"logps/chosen": -427.79998779296875,
"logps/rejected": -430.79998779296875,
"loss": 0.3578,
"nll_loss": 1.2980468273162842,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.646484375,
"rewards/margins": 2.233593702316284,
"rewards/rejected": -0.5870116949081421,
"step": 660
},
{
"epoch": 0.5079605761940864,
"grad_norm": 154.46017727412266,
"learning_rate": 2.737994945240101e-07,
"logits/chosen": 0.24692383408546448,
"logits/rejected": 0.05348510667681694,
"logps/chosen": -557.7999877929688,
"logps/rejected": -567.0,
"loss": 0.3512,
"nll_loss": 1.154687523841858,
"rewards/accuracies": 0.75,
"rewards/chosen": 2.2105469703674316,
"rewards/margins": 2.391406297683716,
"rewards/rejected": -0.18050536513328552,
"step": 670
},
{
"epoch": 0.5155420773313116,
"grad_norm": 269.5164271871116,
"learning_rate": 2.6958719460825606e-07,
"logits/chosen": 0.18512573838233948,
"logits/rejected": 0.02484130859375,
"logps/chosen": -563.5999755859375,
"logps/rejected": -601.2000122070312,
"loss": 0.4837,
"nll_loss": 1.1749999523162842,
"rewards/accuracies": 0.6875,
"rewards/chosen": 1.6671874523162842,
"rewards/margins": 1.5674316883087158,
"rewards/rejected": 0.09707031399011612,
"step": 680
},
{
"epoch": 0.5231235784685367,
"grad_norm": 260.79714861404403,
"learning_rate": 2.653748946925021e-07,
"logits/chosen": 0.17792968451976776,
"logits/rejected": 0.00091552734375,
"logps/chosen": -538.2000122070312,
"logps/rejected": -570.2000122070312,
"loss": 0.4087,
"nll_loss": 1.2804687023162842,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 1.5203125476837158,
"rewards/margins": 2.2164063453674316,
"rewards/rejected": -0.69793701171875,
"step": 690
},
{
"epoch": 0.530705079605762,
"grad_norm": 77.58328940429827,
"learning_rate": 2.611625947767481e-07,
"logits/chosen": 0.19677734375,
"logits/rejected": 0.01052246056497097,
"logps/chosen": -472.20001220703125,
"logps/rejected": -511.79998779296875,
"loss": 0.4354,
"nll_loss": 1.3624999523162842,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 1.189550757408142,
"rewards/margins": 2.3046875,
"rewards/rejected": -1.11328125,
"step": 700
},
{
"epoch": 0.5382865807429871,
"grad_norm": 189.1893865483772,
"learning_rate": 2.5695029486099405e-07,
"logits/chosen": 0.16862793266773224,
"logits/rejected": 0.10245361179113388,
"logps/chosen": -507.0,
"logps/rejected": -519.7999877929688,
"loss": 0.5728,
"nll_loss": 1.277734398841858,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.6670166254043579,
"rewards/margins": 1.80126953125,
"rewards/rejected": -1.134374976158142,
"step": 710
},
{
"epoch": 0.5458680818802123,
"grad_norm": 68.61904191897719,
"learning_rate": 2.5273799494524007e-07,
"logits/chosen": 0.25273436307907104,
"logits/rejected": 0.10590820014476776,
"logps/chosen": -478.29998779296875,
"logps/rejected": -528.2000122070312,
"loss": 0.4332,
"nll_loss": 1.21875,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.42304688692092896,
"rewards/margins": 2.076367139816284,
"rewards/rejected": -1.6535155773162842,
"step": 720
},
{
"epoch": 0.5534495830174374,
"grad_norm": 157.38793937778374,
"learning_rate": 2.485256950294861e-07,
"logits/chosen": 0.19511719048023224,
"logits/rejected": 0.06821288913488388,
"logps/chosen": -391.79998779296875,
"logps/rejected": -453.0,
"loss": 0.4128,
"nll_loss": 1.2273437976837158,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.0950195789337158,
"rewards/margins": 1.953125,
"rewards/rejected": -0.858593761920929,
"step": 730
},
{
"epoch": 0.5610310841546626,
"grad_norm": 83.09266555988135,
"learning_rate": 2.443133951137321e-07,
"logits/chosen": 0.15876464545726776,
"logits/rejected": -0.01612548902630806,
"logps/chosen": -470.20001220703125,
"logps/rejected": -466.79998779296875,
"loss": 0.2883,
"nll_loss": 1.114843726158142,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 1.08984375,
"rewards/margins": 2.4671874046325684,
"rewards/rejected": -1.376367211341858,
"step": 740
},
{
"epoch": 0.5686125852918877,
"grad_norm": 136.52742231810836,
"learning_rate": 2.4010109519797806e-07,
"logits/chosen": 0.24506835639476776,
"logits/rejected": -0.0029052733443677425,
"logps/chosen": -485.79998779296875,
"logps/rejected": -464.3999938964844,
"loss": 0.3478,
"nll_loss": 1.302343726158142,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.229882836341858,
"rewards/margins": 2.585156202316284,
"rewards/rejected": -1.3562500476837158,
"step": 750
},
{
"epoch": 0.576194086429113,
"grad_norm": 100.82715249225713,
"learning_rate": 2.3588879528222407e-07,
"logits/chosen": 0.21192626655101776,
"logits/rejected": 0.06955566257238388,
"logps/chosen": -527.0,
"logps/rejected": -494.3999938964844,
"loss": 0.3816,
"nll_loss": 1.222265601158142,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.437890648841858,
"rewards/margins": 2.26953125,
"rewards/rejected": -0.832714855670929,
"step": 760
},
{
"epoch": 0.5837755875663382,
"grad_norm": 147.57950000370568,
"learning_rate": 2.316764953664701e-07,
"logits/chosen": 0.08811035007238388,
"logits/rejected": -0.0233154296875,
"logps/chosen": -573.5999755859375,
"logps/rejected": -605.4000244140625,
"loss": 0.4853,
"nll_loss": 1.2917969226837158,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.7355468273162842,
"rewards/margins": 2.079296827316284,
"rewards/rejected": -0.3466796875,
"step": 770
},
{
"epoch": 0.5913570887035633,
"grad_norm": 31.889468406083985,
"learning_rate": 2.2746419545071608e-07,
"logits/chosen": 0.11967773735523224,
"logits/rejected": 0.01263427734375,
"logps/chosen": -496.6000061035156,
"logps/rejected": -455.6000061035156,
"loss": 0.3848,
"nll_loss": 1.337890625,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.70703125,
"rewards/margins": 2.2591795921325684,
"rewards/rejected": -0.55078125,
"step": 780
},
{
"epoch": 0.5989385898407885,
"grad_norm": 136.6171237288431,
"learning_rate": 2.2325189553496206e-07,
"logits/chosen": 0.23629149794578552,
"logits/rejected": 0.123291015625,
"logps/chosen": -495.3999938964844,
"logps/rejected": -505.0,
"loss": 0.3264,
"nll_loss": 1.367578148841858,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 1.6804687976837158,
"rewards/margins": 2.444531202316284,
"rewards/rejected": -0.7638183832168579,
"step": 790
},
{
"epoch": 0.6065200909780136,
"grad_norm": 169.16069886552995,
"learning_rate": 2.1903959561920808e-07,
"logits/chosen": 0.18515625596046448,
"logits/rejected": 0.00864257849752903,
"logps/chosen": -416.20001220703125,
"logps/rejected": -472.20001220703125,
"loss": 0.3884,
"nll_loss": 1.1886718273162842,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.984765648841858,
"rewards/margins": 2.346874952316284,
"rewards/rejected": -0.36259764432907104,
"step": 800
},
{
"epoch": 0.6141015921152388,
"grad_norm": 300.451641237815,
"learning_rate": 2.1482729570345407e-07,
"logits/chosen": 0.22788086533546448,
"logits/rejected": 0.08359374850988388,
"logps/chosen": -546.7999877929688,
"logps/rejected": -531.5999755859375,
"loss": 0.4231,
"nll_loss": 1.183984398841858,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.3076171875,
"rewards/margins": 2.2835936546325684,
"rewards/rejected": -0.977038562297821,
"step": 810
},
{
"epoch": 0.621683093252464,
"grad_norm": 183.86035293603638,
"learning_rate": 2.1061499578770005e-07,
"logits/chosen": 0.21928711235523224,
"logits/rejected": 0.07795409858226776,
"logps/chosen": -462.79998779296875,
"logps/rejected": -428.6000061035156,
"loss": 0.3567,
"nll_loss": 1.178125023841858,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 1.20458984375,
"rewards/margins": 2.129687547683716,
"rewards/rejected": -0.925891101360321,
"step": 820
},
{
"epoch": 0.6292645943896892,
"grad_norm": 120.21446058772034,
"learning_rate": 2.064026958719461e-07,
"logits/chosen": 0.15007324516773224,
"logits/rejected": -0.02866210974752903,
"logps/chosen": -534.9000244140625,
"logps/rejected": -520.5999755859375,
"loss": 0.366,
"nll_loss": 1.256250023841858,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.5652344226837158,
"rewards/margins": 2.6624999046325684,
"rewards/rejected": -1.100000023841858,
"step": 830
},
{
"epoch": 0.6368460955269143,
"grad_norm": 153.8126911904177,
"learning_rate": 2.0219039595619208e-07,
"logits/chosen": 0.13383789360523224,
"logits/rejected": 0.04542846605181694,
"logps/chosen": -485.20001220703125,
"logps/rejected": -536.7999877929688,
"loss": 0.4412,
"nll_loss": 1.170312523841858,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 1.072851538658142,
"rewards/margins": 2.020214796066284,
"rewards/rejected": -0.9491211175918579,
"step": 840
},
{
"epoch": 0.6444275966641395,
"grad_norm": 121.8986484261781,
"learning_rate": 1.9797809604043807e-07,
"logits/chosen": 0.16423949599266052,
"logits/rejected": -0.01621093787252903,
"logps/chosen": -535.5999755859375,
"logps/rejected": -534.2000122070312,
"loss": 0.4136,
"nll_loss": 1.3347656726837158,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.1912109851837158,
"rewards/margins": 2.6781249046325684,
"rewards/rejected": -1.484765648841858,
"step": 850
},
{
"epoch": 0.6520090978013646,
"grad_norm": 107.32753219029611,
"learning_rate": 1.937657961246841e-07,
"logits/chosen": 0.24458007514476776,
"logits/rejected": -0.0005981445428915322,
"logps/chosen": -509.0,
"logps/rejected": -537.9000244140625,
"loss": 0.4031,
"nll_loss": 1.085546851158142,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 1.205078125,
"rewards/margins": 2.5023436546325684,
"rewards/rejected": -1.293359398841858,
"step": 860
},
{
"epoch": 0.6595905989385898,
"grad_norm": 199.56179373972668,
"learning_rate": 1.8955349620893008e-07,
"logits/chosen": 0.16865234076976776,
"logits/rejected": 0.01300659216940403,
"logps/chosen": -501.3999938964844,
"logps/rejected": -553.7999877929688,
"loss": 0.5141,
"nll_loss": 1.271875023841858,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 1.2307617664337158,
"rewards/margins": 2.220898389816284,
"rewards/rejected": -0.989794909954071,
"step": 870
},
{
"epoch": 0.6671721000758151,
"grad_norm": 145.67471682071726,
"learning_rate": 1.8534119629317606e-07,
"logits/chosen": 0.14238281548023224,
"logits/rejected": 0.04350585862994194,
"logps/chosen": -609.0,
"logps/rejected": -608.7999877929688,
"loss": 0.4773,
"nll_loss": 1.243749976158142,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 1.129370093345642,
"rewards/margins": 1.8621094226837158,
"rewards/rejected": -0.7308593988418579,
"step": 880
},
{
"epoch": 0.6747536012130402,
"grad_norm": 196.73193775310398,
"learning_rate": 1.8112889637742208e-07,
"logits/chosen": 0.04483642429113388,
"logits/rejected": 0.0048583983443677425,
"logps/chosen": -693.7999877929688,
"logps/rejected": -666.5999755859375,
"loss": 0.6123,
"nll_loss": 1.2527344226837158,
"rewards/accuracies": 0.6875,
"rewards/chosen": 1.357812523841858,
"rewards/margins": 1.69140625,
"rewards/rejected": -0.33369141817092896,
"step": 890
},
{
"epoch": 0.6823351023502654,
"grad_norm": 194.77536214548843,
"learning_rate": 1.7691659646166807e-07,
"logits/chosen": 0.14555664360523224,
"logits/rejected": 0.04428710788488388,
"logps/chosen": -546.4000244140625,
"logps/rejected": -503.20001220703125,
"loss": 0.4322,
"nll_loss": 1.318750023841858,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 1.5364258289337158,
"rewards/margins": 2.186328172683716,
"rewards/rejected": -0.6504882574081421,
"step": 900
},
{
"epoch": 0.6899166034874905,
"grad_norm": 175.15629650260274,
"learning_rate": 1.7270429654591406e-07,
"logits/chosen": 0.08806152641773224,
"logits/rejected": 0.008822632022202015,
"logps/chosen": -550.2000122070312,
"logps/rejected": -557.2000122070312,
"loss": 0.4278,
"nll_loss": 1.269921898841858,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 1.422265648841858,
"rewards/margins": 2.3148436546325684,
"rewards/rejected": -0.8949218988418579,
"step": 910
},
{
"epoch": 0.6974981046247157,
"grad_norm": 158.19405566519555,
"learning_rate": 1.6849199663016007e-07,
"logits/chosen": 0.11597900092601776,
"logits/rejected": -0.031158447265625,
"logps/chosen": -474.20001220703125,
"logps/rejected": -498.3999938964844,
"loss": 0.3356,
"nll_loss": 1.2820312976837158,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.6453125476837158,
"rewards/margins": 2.567187547683716,
"rewards/rejected": -0.9203125238418579,
"step": 920
},
{
"epoch": 0.7050796057619408,
"grad_norm": 79.99219662946521,
"learning_rate": 1.6427969671440606e-07,
"logits/chosen": 0.1439208984375,
"logits/rejected": -0.03364257887005806,
"logps/chosen": -457.8999938964844,
"logps/rejected": -511.6000061035156,
"loss": 0.405,
"nll_loss": 1.286718726158142,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.412841796875,
"rewards/margins": 2.419140577316284,
"rewards/rejected": -1.007421851158142,
"step": 930
},
{
"epoch": 0.7126611068991661,
"grad_norm": 160.68166830448138,
"learning_rate": 1.6006739679865205e-07,
"logits/chosen": 0.12910155951976776,
"logits/rejected": 0.02253418043255806,
"logps/chosen": -550.5999755859375,
"logps/rejected": -539.4000244140625,
"loss": 0.467,
"nll_loss": 1.023046851158142,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 1.1843750476837158,
"rewards/margins": 1.8195312023162842,
"rewards/rejected": -0.633984386920929,
"step": 940
},
{
"epoch": 0.7202426080363912,
"grad_norm": 48.624487580082295,
"learning_rate": 1.5585509688289806e-07,
"logits/chosen": 0.11916504055261612,
"logits/rejected": 0.012451171875,
"logps/chosen": -501.6000061035156,
"logps/rejected": -440.1000061035156,
"loss": 0.42,
"nll_loss": 1.279296875,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.554101586341858,
"rewards/margins": 2.168750047683716,
"rewards/rejected": -0.6158202886581421,
"step": 950
},
{
"epoch": 0.7278241091736164,
"grad_norm": 108.99945705457651,
"learning_rate": 1.5164279696714405e-07,
"logits/chosen": 0.14907225966453552,
"logits/rejected": 0.005786132998764515,
"logps/chosen": -491.79998779296875,
"logps/rejected": -485.20001220703125,
"loss": 0.4132,
"nll_loss": 1.2664062976837158,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.8162109851837158,
"rewards/margins": 2.458300828933716,
"rewards/rejected": -0.6415039300918579,
"step": 960
},
{
"epoch": 0.7354056103108415,
"grad_norm": 67.28611974505353,
"learning_rate": 1.4743049705139004e-07,
"logits/chosen": 0.10041503608226776,
"logits/rejected": -0.009716796688735485,
"logps/chosen": -593.7999877929688,
"logps/rejected": -552.0,
"loss": 0.4633,
"nll_loss": 1.2531249523162842,
"rewards/accuracies": 0.6875,
"rewards/chosen": 1.6394531726837158,
"rewards/margins": 2.0875000953674316,
"rewards/rejected": -0.44902342557907104,
"step": 970
},
{
"epoch": 0.7429871114480667,
"grad_norm": 192.60992136850282,
"learning_rate": 1.4321819713563605e-07,
"logits/chosen": 0.11516723781824112,
"logits/rejected": 0.006024169735610485,
"logps/chosen": -485.1000061035156,
"logps/rejected": -503.6000061035156,
"loss": 0.4476,
"nll_loss": 1.243749976158142,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 1.648046851158142,
"rewards/margins": 2.1099610328674316,
"rewards/rejected": -0.4625244140625,
"step": 980
},
{
"epoch": 0.7505686125852918,
"grad_norm": 94.26112472676634,
"learning_rate": 1.3900589721988204e-07,
"logits/chosen": 0.06564941257238388,
"logits/rejected": -0.05050048977136612,
"logps/chosen": -572.0,
"logps/rejected": -521.4000244140625,
"loss": 0.3979,
"nll_loss": 1.314062476158142,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 2.239062547683716,
"rewards/margins": 2.651171922683716,
"rewards/rejected": -0.4076171815395355,
"step": 990
},
{
"epoch": 0.7581501137225171,
"grad_norm": 116.49945744118433,
"learning_rate": 1.3479359730412803e-07,
"logits/chosen": 0.20512695610523224,
"logits/rejected": 0.11342773586511612,
"logps/chosen": -461.0,
"logps/rejected": -458.6000061035156,
"loss": 0.3895,
"nll_loss": 1.366796851158142,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.659765601158142,
"rewards/margins": 2.239062547683716,
"rewards/rejected": -0.5755370855331421,
"step": 1000
},
{
"epoch": 0.7657316148597423,
"grad_norm": 210.31730274263796,
"learning_rate": 1.3058129738837404e-07,
"logits/chosen": 0.20961913466453552,
"logits/rejected": 0.07167968899011612,
"logps/chosen": -475.79998779296875,
"logps/rejected": -519.9000244140625,
"loss": 0.3534,
"nll_loss": 1.1046874523162842,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.8623046875,
"rewards/margins": 2.400390625,
"rewards/rejected": -0.5356200933456421,
"step": 1010
},
{
"epoch": 0.7733131159969674,
"grad_norm": 103.41619948561777,
"learning_rate": 1.2636899747262003e-07,
"logits/chosen": 0.12003479152917862,
"logits/rejected": 0.03270263597369194,
"logps/chosen": -683.5999755859375,
"logps/rejected": -555.4000244140625,
"loss": 0.4201,
"nll_loss": 1.4210937023162842,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.8644530773162842,
"rewards/margins": 2.190234422683716,
"rewards/rejected": -0.32763671875,
"step": 1020
},
{
"epoch": 0.7808946171341926,
"grad_norm": 133.79887910214092,
"learning_rate": 1.2215669755686605e-07,
"logits/chosen": 0.09956054389476776,
"logits/rejected": 0.02629699744284153,
"logps/chosen": -606.0,
"logps/rejected": -603.0,
"loss": 0.4273,
"nll_loss": 1.2488281726837158,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.7316405773162842,
"rewards/margins": 2.490234375,
"rewards/rejected": -0.759570300579071,
"step": 1030
},
{
"epoch": 0.7884761182714177,
"grad_norm": 186.46816938496252,
"learning_rate": 1.1794439764111204e-07,
"logits/chosen": 0.12141112983226776,
"logits/rejected": 0.01883544959127903,
"logps/chosen": -555.5999755859375,
"logps/rejected": -588.0,
"loss": 0.3112,
"nll_loss": 1.269921898841858,
"rewards/accuracies": 0.8125,
"rewards/chosen": 1.5886719226837158,
"rewards/margins": 2.426562547683716,
"rewards/rejected": -0.837207019329071,
"step": 1040
},
{
"epoch": 0.796057619408643,
"grad_norm": 203.98306483470228,
"learning_rate": 1.1373209772535804e-07,
"logits/chosen": 0.21123047173023224,
"logits/rejected": 0.06298217922449112,
"logps/chosen": -525.4000244140625,
"logps/rejected": -522.0,
"loss": 0.4055,
"nll_loss": 1.1183593273162842,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.994531273841858,
"rewards/margins": 2.360546827316284,
"rewards/rejected": -0.36601561307907104,
"step": 1050
},
{
"epoch": 0.8036391205458681,
"grad_norm": 187.18328566144393,
"learning_rate": 1.0951979780960404e-07,
"logits/chosen": 0.10944823920726776,
"logits/rejected": 0.02037963829934597,
"logps/chosen": -590.4000244140625,
"logps/rejected": -583.0,
"loss": 0.4506,
"nll_loss": 1.330468773841858,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 1.444921851158142,
"rewards/margins": 2.3101563453674316,
"rewards/rejected": -0.86767578125,
"step": 1060
},
{
"epoch": 0.8112206216830933,
"grad_norm": 98.73681027058517,
"learning_rate": 1.0530749789385003e-07,
"logits/chosen": 0.10260619968175888,
"logits/rejected": -0.04973144456744194,
"logps/chosen": -541.5999755859375,
"logps/rejected": -571.5999755859375,
"loss": 0.3593,
"nll_loss": 1.21484375,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.3909180164337158,
"rewards/margins": 2.383593797683716,
"rewards/rejected": -0.995898425579071,
"step": 1070
},
{
"epoch": 0.8188021228203184,
"grad_norm": 33.39836753544675,
"learning_rate": 1.0109519797809604e-07,
"logits/chosen": 0.18569335341453552,
"logits/rejected": -0.02208251878619194,
"logps/chosen": -469.20001220703125,
"logps/rejected": -457.20001220703125,
"loss": 0.372,
"nll_loss": 1.341796875,
"rewards/accuracies": 0.8125,
"rewards/chosen": 1.908593773841858,
"rewards/margins": 2.8589844703674316,
"rewards/rejected": -0.948046863079071,
"step": 1080
},
{
"epoch": 0.8263836239575436,
"grad_norm": 125.04399201580806,
"learning_rate": 9.688289806234204e-08,
"logits/chosen": 0.15627440810203552,
"logits/rejected": 0.0027099610306322575,
"logps/chosen": -597.0,
"logps/rejected": -624.0,
"loss": 0.341,
"nll_loss": 1.235937476158142,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.831640601158142,
"rewards/margins": 2.4085936546325684,
"rewards/rejected": -0.5736328363418579,
"step": 1090
},
{
"epoch": 0.8339651250947687,
"grad_norm": 131.49101558985592,
"learning_rate": 9.267059814658803e-08,
"logits/chosen": 0.2294921875,
"logits/rejected": 0.06977538764476776,
"logps/chosen": -434.79998779296875,
"logps/rejected": -435.0,
"loss": 0.3921,
"nll_loss": 1.134765625,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 1.490234375,
"rewards/margins": 2.6937499046325684,
"rewards/rejected": -1.2062499523162842,
"step": 1100
},
{
"epoch": 0.8415466262319939,
"grad_norm": 119.02191047253005,
"learning_rate": 8.845829823083403e-08,
"logits/chosen": 0.293212890625,
"logits/rejected": 0.04813842847943306,
"logps/chosen": -467.20001220703125,
"logps/rejected": -505.6000061035156,
"loss": 0.3092,
"nll_loss": 1.173828125,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 1.607812523841858,
"rewards/margins": 2.729687452316284,
"rewards/rejected": -1.121038794517517,
"step": 1110
},
{
"epoch": 0.849128127369219,
"grad_norm": 149.05016302273359,
"learning_rate": 8.424599831508004e-08,
"logits/chosen": 0.004260254092514515,
"logits/rejected": 0.03310547024011612,
"logps/chosen": -566.2000122070312,
"logps/rejected": -556.4000244140625,
"loss": 0.4365,
"nll_loss": 1.365234375,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 1.508398413658142,
"rewards/margins": 2.2476563453674316,
"rewards/rejected": -0.7374023199081421,
"step": 1120
},
{
"epoch": 0.8567096285064443,
"grad_norm": 55.23109783038184,
"learning_rate": 8.003369839932602e-08,
"logits/chosen": 0.10483398288488388,
"logits/rejected": -0.04458007961511612,
"logps/chosen": -669.7000122070312,
"logps/rejected": -617.2000122070312,
"loss": 0.3465,
"nll_loss": 1.2605469226837158,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.783593773841858,
"rewards/margins": 2.4007811546325684,
"rewards/rejected": -0.6149657964706421,
"step": 1130
},
{
"epoch": 0.8642911296436695,
"grad_norm": 119.87227017716317,
"learning_rate": 7.582139848357203e-08,
"logits/chosen": 0.21893921494483948,
"logits/rejected": 0.0064941407181322575,
"logps/chosen": -496.20001220703125,
"logps/rejected": -506.0,
"loss": 0.4416,
"nll_loss": 1.226953148841858,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 1.5451171398162842,
"rewards/margins": 2.251171827316284,
"rewards/rejected": -0.706738293170929,
"step": 1140
},
{
"epoch": 0.8718726307808946,
"grad_norm": 125.7872093052735,
"learning_rate": 7.160909856781803e-08,
"logits/chosen": 0.14731445908546448,
"logits/rejected": 0.014862060546875,
"logps/chosen": -562.0,
"logps/rejected": -548.4000244140625,
"loss": 0.4338,
"nll_loss": 1.314062476158142,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.6384766101837158,
"rewards/margins": 2.094531297683716,
"rewards/rejected": -0.4574218690395355,
"step": 1150
},
{
"epoch": 0.8794541319181198,
"grad_norm": 155.24491691884234,
"learning_rate": 6.739679865206401e-08,
"logits/chosen": 0.22019043564796448,
"logits/rejected": 0.11717529594898224,
"logps/chosen": -518.7000122070312,
"logps/rejected": -484.3999938964844,
"loss": 0.3854,
"nll_loss": 1.26171875,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.605078101158142,
"rewards/margins": 2.059375047683716,
"rewards/rejected": -0.45521241426467896,
"step": 1160
},
{
"epoch": 0.8870356330553449,
"grad_norm": 72.26607344826763,
"learning_rate": 6.318449873631002e-08,
"logits/chosen": 0.16090087592601776,
"logits/rejected": -0.0035644532181322575,
"logps/chosen": -717.5,
"logps/rejected": -732.7999877929688,
"loss": 0.4008,
"nll_loss": 1.166406273841858,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 2.0093750953674316,
"rewards/margins": 2.692187547683716,
"rewards/rejected": -0.6851562261581421,
"step": 1170
},
{
"epoch": 0.8946171341925702,
"grad_norm": 108.94764921361714,
"learning_rate": 5.897219882055602e-08,
"logits/chosen": 0.16987304389476776,
"logits/rejected": -0.04276733472943306,
"logps/chosen": -556.5999755859375,
"logps/rejected": -527.5999755859375,
"loss": 0.3503,
"nll_loss": 1.2492187023162842,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.68017578125,
"rewards/margins": 2.442578077316284,
"rewards/rejected": -0.7623046636581421,
"step": 1180
},
{
"epoch": 0.9021986353297953,
"grad_norm": 131.0459749369346,
"learning_rate": 5.475989890480202e-08,
"logits/chosen": 0.21416015923023224,
"logits/rejected": -0.0015411376953125,
"logps/chosen": -501.3999938964844,
"logps/rejected": -516.2000122070312,
"loss": 0.3754,
"nll_loss": 1.2999999523162842,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.9015624523162842,
"rewards/margins": 2.109179735183716,
"rewards/rejected": -0.20556640625,
"step": 1190
},
{
"epoch": 0.9097801364670205,
"grad_norm": 307.9800162305656,
"learning_rate": 5.054759898904802e-08,
"logits/chosen": 0.10655517876148224,
"logits/rejected": 0.009765625,
"logps/chosen": -486.6000061035156,
"logps/rejected": -504.0,
"loss": 0.465,
"nll_loss": 1.3386719226837158,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 1.873437523841858,
"rewards/margins": 2.637500047683716,
"rewards/rejected": -0.762402355670929,
"step": 1200
},
{
"epoch": 0.9173616376042456,
"grad_norm": 133.27100734071354,
"learning_rate": 4.6335299073294016e-08,
"logits/chosen": 0.20000000298023224,
"logits/rejected": 0.10622558742761612,
"logps/chosen": -451.29998779296875,
"logps/rejected": -492.6000061035156,
"loss": 0.4231,
"nll_loss": 1.357421875,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.500390648841858,
"rewards/margins": 2.056640625,
"rewards/rejected": -0.5611327886581421,
"step": 1210
},
{
"epoch": 0.9249431387414708,
"grad_norm": 85.24520542811791,
"learning_rate": 4.212299915754002e-08,
"logits/chosen": 0.21176758408546448,
"logits/rejected": 0.04278564453125,
"logps/chosen": -466.3999938964844,
"logps/rejected": -525.0,
"loss": 0.3879,
"nll_loss": 1.3093750476837158,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.709375023841858,
"rewards/margins": 2.3140625953674316,
"rewards/rejected": -0.607861340045929,
"step": 1220
},
{
"epoch": 0.9325246398786959,
"grad_norm": 112.79958598714522,
"learning_rate": 3.791069924178601e-08,
"logits/chosen": 0.12949219346046448,
"logits/rejected": 0.00974121131002903,
"logps/chosen": -507.0,
"logps/rejected": -529.2000122070312,
"loss": 0.399,
"nll_loss": 1.314453125,
"rewards/accuracies": 0.75,
"rewards/chosen": 1.6931641101837158,
"rewards/margins": 2.850781202316284,
"rewards/rejected": -1.158203125,
"step": 1230
},
{
"epoch": 0.9401061410159212,
"grad_norm": 176.59295105465074,
"learning_rate": 3.369839932603201e-08,
"logits/chosen": 0.11582031100988388,
"logits/rejected": -0.012280273251235485,
"logps/chosen": -528.2000122070312,
"logps/rejected": -531.7999877929688,
"loss": 0.3927,
"nll_loss": 1.235937476158142,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.801855444908142,
"rewards/margins": 2.326171875,
"rewards/rejected": -0.5223633050918579,
"step": 1240
},
{
"epoch": 0.9476876421531463,
"grad_norm": 146.1635844887063,
"learning_rate": 2.948609941027801e-08,
"logits/chosen": 0.23291015625,
"logits/rejected": 0.02499694749712944,
"logps/chosen": -417.79998779296875,
"logps/rejected": -488.20001220703125,
"loss": 0.3236,
"nll_loss": 1.2312500476837158,
"rewards/accuracies": 0.8125,
"rewards/chosen": 1.4695312976837158,
"rewards/margins": 2.3531250953674316,
"rewards/rejected": -0.8837890625,
"step": 1250
},
{
"epoch": 0.9552691432903715,
"grad_norm": 108.13435305388558,
"learning_rate": 2.527379949452401e-08,
"logits/chosen": 0.13677978515625,
"logits/rejected": 0.04533691331744194,
"logps/chosen": -525.0,
"logps/rejected": -642.0,
"loss": 0.3805,
"nll_loss": 1.3800780773162842,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 1.7277343273162842,
"rewards/margins": 2.5589842796325684,
"rewards/rejected": -0.8324218988418579,
"step": 1260
},
{
"epoch": 0.9628506444275967,
"grad_norm": 268.6347905626109,
"learning_rate": 2.106149957877001e-08,
"logits/chosen": 0.18927001953125,
"logits/rejected": 0.06524658203125,
"logps/chosen": -494.20001220703125,
"logps/rejected": -469.0,
"loss": 0.3914,
"nll_loss": 1.1515624523162842,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 1.841406226158142,
"rewards/margins": 2.49609375,
"rewards/rejected": -0.6563476324081421,
"step": 1270
},
{
"epoch": 0.9704321455648218,
"grad_norm": 88.73642966202097,
"learning_rate": 1.6849199663016004e-08,
"logits/chosen": 0.2381591796875,
"logits/rejected": 0.07663574069738388,
"logps/chosen": -524.2000122070312,
"logps/rejected": -565.7999877929688,
"loss": 0.439,
"nll_loss": 1.342187523841858,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.600976586341858,
"rewards/margins": 2.6089844703674316,
"rewards/rejected": -1.008203148841858,
"step": 1280
},
{
"epoch": 0.978013646702047,
"grad_norm": 110.05713639865995,
"learning_rate": 1.2636899747262005e-08,
"logits/chosen": 0.16679687798023224,
"logits/rejected": 0.04754638671875,
"logps/chosen": -456.3999938964844,
"logps/rejected": -462.6000061035156,
"loss": 0.4392,
"nll_loss": 1.216406226158142,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 1.694726586341858,
"rewards/margins": 2.328125,
"rewards/rejected": -0.631542980670929,
"step": 1290
},
{
"epoch": 0.9855951478392722,
"grad_norm": 166.5083302014064,
"learning_rate": 8.424599831508002e-09,
"logits/chosen": 0.1453857421875,
"logits/rejected": 0.04035644605755806,
"logps/chosen": -548.2000122070312,
"logps/rejected": -470.6000061035156,
"loss": 0.363,
"nll_loss": 1.1730468273162842,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 1.3935546875,
"rewards/margins": 2.388671875,
"rewards/rejected": -0.994140625,
"step": 1300
},
{
"epoch": 0.9931766489764974,
"grad_norm": 118.78830054230653,
"learning_rate": 4.212299915754001e-09,
"logits/chosen": 0.14559325575828552,
"logits/rejected": 0.04741210862994194,
"logps/chosen": -647.0,
"logps/rejected": -675.4000244140625,
"loss": 0.4532,
"nll_loss": 1.234375,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 1.4243896007537842,
"rewards/margins": 2.1460938453674316,
"rewards/rejected": -0.7232910394668579,
"step": 1310
},
{
"epoch": 1.0,
"eval_logits/chosen": 0.2664245665073395,
"eval_logits/rejected": 0.09165038913488388,
"eval_logps/chosen": -413.79998779296875,
"eval_logps/rejected": -411.79998779296875,
"eval_loss": 0.3644465506076813,
"eval_nll_loss": 1.1941406726837158,
"eval_rewards/accuracies": 0.762499988079071,
"eval_rewards/chosen": 1.34765625,
"eval_rewards/margins": 2.319140672683716,
"eval_rewards/rejected": -0.971630871295929,
"eval_runtime": 6.1397,
"eval_samples_per_second": 12.378,
"eval_steps_per_second": 1.629,
"step": 1319
},
{
"epoch": 1.0,
"step": 1319,
"total_flos": 0.0,
"train_loss": 0.45258238606601164,
"train_runtime": 2337.5511,
"train_samples_per_second": 4.512,
"train_steps_per_second": 0.564
}
],
"logging_steps": 10,
"max_steps": 1319,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}