justthzz's picture
Upload 14 files
ba7c245 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.0,
"eval_steps": 500,
"global_step": 2154,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013927576601671309,
"grad_norm": 4.152076244354248,
"learning_rate": 4.988393686165274e-05,
"logits/chosen": -62.701507568359375,
"logits/rejected": -68.97389221191406,
"logps/chosen": -198.8888702392578,
"logps/rejected": -61.155731201171875,
"loss": 0.4537,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.9171991348266602,
"rewards/margins": 0.8940274119377136,
"rewards/rejected": 0.02317170798778534,
"step": 10
},
{
"epoch": 0.027855153203342618,
"grad_norm": 1.4107317924499512,
"learning_rate": 4.965181058495822e-05,
"logits/chosen": -62.833290100097656,
"logits/rejected": -69.8056411743164,
"logps/chosen": -184.6682586669922,
"logps/rejected": -70.85790252685547,
"loss": 0.0524,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.683924913406372,
"rewards/margins": 3.4110190868377686,
"rewards/rejected": -0.7270944714546204,
"step": 20
},
{
"epoch": 0.04178272980501393,
"grad_norm": 0.09278041124343872,
"learning_rate": 4.9419684308263696e-05,
"logits/chosen": -61.66850662231445,
"logits/rejected": -68.24287414550781,
"logps/chosen": -187.5883331298828,
"logps/rejected": -84.70890808105469,
"loss": 0.0225,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7358405590057373,
"rewards/margins": 4.847321510314941,
"rewards/rejected": -2.111480712890625,
"step": 30
},
{
"epoch": 0.055710306406685235,
"grad_norm": 0.0695909857749939,
"learning_rate": 4.918755803156918e-05,
"logits/chosen": -60.128639221191406,
"logits/rejected": -69.80757141113281,
"logps/chosen": -201.54000854492188,
"logps/rejected": -101.98795318603516,
"loss": 0.0066,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8020272254943848,
"rewards/margins": 6.231036186218262,
"rewards/rejected": -3.4290084838867188,
"step": 40
},
{
"epoch": 0.06963788300835655,
"grad_norm": 0.05616675317287445,
"learning_rate": 4.895543175487465e-05,
"logits/chosen": -62.200286865234375,
"logits/rejected": -70.98320007324219,
"logps/chosen": -196.42550659179688,
"logps/rejected": -113.12223815917969,
"loss": 0.0064,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.6243250370025635,
"rewards/margins": 7.210093021392822,
"rewards/rejected": -4.585768699645996,
"step": 50
},
{
"epoch": 0.08356545961002786,
"grad_norm": 0.11463262885808945,
"learning_rate": 4.872330547818013e-05,
"logits/chosen": -60.18315505981445,
"logits/rejected": -67.06063842773438,
"logps/chosen": -181.308837890625,
"logps/rejected": -111.84517669677734,
"loss": 0.0029,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.787447929382324,
"rewards/margins": 7.701785087585449,
"rewards/rejected": -4.914338111877441,
"step": 60
},
{
"epoch": 0.09749303621169916,
"grad_norm": 0.547484815120697,
"learning_rate": 4.849117920148561e-05,
"logits/chosen": -62.386741638183594,
"logits/rejected": -70.57563781738281,
"logps/chosen": -181.2909698486328,
"logps/rejected": -112.4539566040039,
"loss": 0.0112,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.940854787826538,
"rewards/margins": 7.5811638832092285,
"rewards/rejected": -4.6403093338012695,
"step": 70
},
{
"epoch": 0.11142061281337047,
"grad_norm": 0.0395994707942009,
"learning_rate": 4.825905292479109e-05,
"logits/chosen": -59.95817947387695,
"logits/rejected": -66.5330581665039,
"logps/chosen": -188.38150024414062,
"logps/rejected": -113.49845123291016,
"loss": 0.0032,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9799869060516357,
"rewards/margins": 8.083974838256836,
"rewards/rejected": -5.103987693786621,
"step": 80
},
{
"epoch": 0.12534818941504178,
"grad_norm": 0.015164317563176155,
"learning_rate": 4.8026926648096564e-05,
"logits/chosen": -59.706085205078125,
"logits/rejected": -65.60169219970703,
"logps/chosen": -183.0960235595703,
"logps/rejected": -116.09562683105469,
"loss": 0.0022,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0736746788024902,
"rewards/margins": 8.412053108215332,
"rewards/rejected": -5.338377952575684,
"step": 90
},
{
"epoch": 0.1392757660167131,
"grad_norm": 0.02766968123614788,
"learning_rate": 4.7794800371402045e-05,
"logits/chosen": -58.617881774902344,
"logits/rejected": -67.40831756591797,
"logps/chosen": -186.41848754882812,
"logps/rejected": -124.04377746582031,
"loss": 0.001,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1033053398132324,
"rewards/margins": 9.324697494506836,
"rewards/rejected": -6.2213921546936035,
"step": 100
},
{
"epoch": 0.1532033426183844,
"grad_norm": 0.00412454828619957,
"learning_rate": 4.756267409470752e-05,
"logits/chosen": -60.01245880126953,
"logits/rejected": -75.11415100097656,
"logps/chosen": -185.37948608398438,
"logps/rejected": -135.00729370117188,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.50886869430542,
"rewards/margins": 9.430502891540527,
"rewards/rejected": -6.921634674072266,
"step": 110
},
{
"epoch": 0.1671309192200557,
"grad_norm": 0.0065751285292208195,
"learning_rate": 4.7330547818013e-05,
"logits/chosen": -62.41272735595703,
"logits/rejected": -78.67548370361328,
"logps/chosen": -192.0312957763672,
"logps/rejected": -128.97535705566406,
"loss": 0.0237,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.159588575363159,
"rewards/margins": 9.233824729919434,
"rewards/rejected": -7.0742363929748535,
"step": 120
},
{
"epoch": 0.181058495821727,
"grad_norm": 0.021884003654122353,
"learning_rate": 4.7098421541318476e-05,
"logits/chosen": -58.38056564331055,
"logits/rejected": -73.96531677246094,
"logps/chosen": -190.47996520996094,
"logps/rejected": -132.82919311523438,
"loss": 0.0012,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.546886682510376,
"rewards/margins": 9.201826095581055,
"rewards/rejected": -6.654940128326416,
"step": 130
},
{
"epoch": 0.19498607242339833,
"grad_norm": 0.019679848104715347,
"learning_rate": 4.686629526462396e-05,
"logits/chosen": -59.04082489013672,
"logits/rejected": -72.82725524902344,
"logps/chosen": -201.3197021484375,
"logps/rejected": -137.27078247070312,
"loss": 0.0012,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7231216430664062,
"rewards/margins": 9.771955490112305,
"rewards/rejected": -7.048834323883057,
"step": 140
},
{
"epoch": 0.20891364902506965,
"grad_norm": 0.024676833301782608,
"learning_rate": 4.663416898792944e-05,
"logits/chosen": -57.91033172607422,
"logits/rejected": -75.91812896728516,
"logps/chosen": -193.44387817382812,
"logps/rejected": -137.32614135742188,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.3082289695739746,
"rewards/margins": 9.814374923706055,
"rewards/rejected": -7.506146430969238,
"step": 150
},
{
"epoch": 0.22284122562674094,
"grad_norm": 0.35007965564727783,
"learning_rate": 4.640204271123492e-05,
"logits/chosen": -59.402008056640625,
"logits/rejected": -80.88484191894531,
"logps/chosen": -194.41064453125,
"logps/rejected": -142.43862915039062,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.072852373123169,
"rewards/margins": 9.972261428833008,
"rewards/rejected": -7.899407863616943,
"step": 160
},
{
"epoch": 0.23676880222841226,
"grad_norm": 0.16541853547096252,
"learning_rate": 4.6169916434540394e-05,
"logits/chosen": -57.10036087036133,
"logits/rejected": -77.26717376708984,
"logps/chosen": -188.2787322998047,
"logps/rejected": -140.36428833007812,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.387071132659912,
"rewards/margins": 10.67628002166748,
"rewards/rejected": -8.289209365844727,
"step": 170
},
{
"epoch": 0.25069637883008355,
"grad_norm": 0.03563378006219864,
"learning_rate": 4.5937790157845876e-05,
"logits/chosen": -56.153770446777344,
"logits/rejected": -69.28707885742188,
"logps/chosen": -180.4019775390625,
"logps/rejected": -142.40525817871094,
"loss": 0.002,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8976480960845947,
"rewards/margins": 10.338732719421387,
"rewards/rejected": -7.441084861755371,
"step": 180
},
{
"epoch": 0.2646239554317549,
"grad_norm": 0.06060578674077988,
"learning_rate": 4.570566388115135e-05,
"logits/chosen": -54.91765213012695,
"logits/rejected": -62.35480499267578,
"logps/chosen": -181.2240753173828,
"logps/rejected": -123.99375915527344,
"loss": 0.003,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1657493114471436,
"rewards/margins": 9.318578720092773,
"rewards/rejected": -6.152829170227051,
"step": 190
},
{
"epoch": 0.2785515320334262,
"grad_norm": 0.0840856060385704,
"learning_rate": 4.547353760445683e-05,
"logits/chosen": -59.377830505371094,
"logits/rejected": -71.08708190917969,
"logps/chosen": -185.47975158691406,
"logps/rejected": -134.31422424316406,
"loss": 0.0017,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0341360569000244,
"rewards/margins": 10.54447078704834,
"rewards/rejected": -7.5103349685668945,
"step": 200
},
{
"epoch": 0.2924791086350975,
"grad_norm": 0.06361512094736099,
"learning_rate": 4.5241411327762306e-05,
"logits/chosen": -63.87712478637695,
"logits/rejected": -73.06085205078125,
"logps/chosen": -213.22708129882812,
"logps/rejected": -147.33645629882812,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.229581356048584,
"rewards/margins": 9.58311939239502,
"rewards/rejected": -7.353537559509277,
"step": 210
},
{
"epoch": 0.3064066852367688,
"grad_norm": 0.046209145337343216,
"learning_rate": 4.500928505106779e-05,
"logits/chosen": -59.61906051635742,
"logits/rejected": -73.25764465332031,
"logps/chosen": -192.64942932128906,
"logps/rejected": -150.16732788085938,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.1536495685577393,
"rewards/margins": 10.42214298248291,
"rewards/rejected": -8.268494606018066,
"step": 220
},
{
"epoch": 0.3203342618384401,
"grad_norm": 0.006761509459465742,
"learning_rate": 4.477715877437326e-05,
"logits/chosen": -58.5309944152832,
"logits/rejected": -68.51580810546875,
"logps/chosen": -193.02699279785156,
"logps/rejected": -150.4070587158203,
"loss": 0.0026,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.504077434539795,
"rewards/margins": 10.579719543457031,
"rewards/rejected": -8.075642585754395,
"step": 230
},
{
"epoch": 0.3342618384401114,
"grad_norm": 0.007206082809716463,
"learning_rate": 4.4545032497678744e-05,
"logits/chosen": -56.670623779296875,
"logits/rejected": -67.32600402832031,
"logps/chosen": -164.60655212402344,
"logps/rejected": -152.1390838623047,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.570728302001953,
"rewards/margins": 11.153335571289062,
"rewards/rejected": -8.582606315612793,
"step": 240
},
{
"epoch": 0.34818941504178275,
"grad_norm": 0.0010453248396515846,
"learning_rate": 4.431290622098422e-05,
"logits/chosen": -57.710182189941406,
"logits/rejected": -67.71039581298828,
"logps/chosen": -192.8719940185547,
"logps/rejected": -145.4492950439453,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7189605236053467,
"rewards/margins": 11.462281227111816,
"rewards/rejected": -8.743319511413574,
"step": 250
},
{
"epoch": 0.362116991643454,
"grad_norm": 0.0006360734696500003,
"learning_rate": 4.40807799442897e-05,
"logits/chosen": -57.6457633972168,
"logits/rejected": -73.46452331542969,
"logps/chosen": -192.46934509277344,
"logps/rejected": -158.818115234375,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.0252301692962646,
"rewards/margins": 11.26839542388916,
"rewards/rejected": -9.2431640625,
"step": 260
},
{
"epoch": 0.37604456824512533,
"grad_norm": 0.007503910455852747,
"learning_rate": 4.3848653667595174e-05,
"logits/chosen": -55.48795700073242,
"logits/rejected": -67.17798614501953,
"logps/chosen": -167.91905212402344,
"logps/rejected": -146.4444122314453,
"loss": 0.0206,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 3.000150203704834,
"rewards/margins": 11.605803489685059,
"rewards/rejected": -8.605653762817383,
"step": 270
},
{
"epoch": 0.38997214484679665,
"grad_norm": 0.04008936882019043,
"learning_rate": 4.3616527390900656e-05,
"logits/chosen": -56.12969207763672,
"logits/rejected": -61.01743698120117,
"logps/chosen": -187.25384521484375,
"logps/rejected": -127.84864807128906,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0087008476257324,
"rewards/margins": 9.433700561523438,
"rewards/rejected": -6.424999237060547,
"step": 280
},
{
"epoch": 0.403899721448468,
"grad_norm": 0.013929404318332672,
"learning_rate": 4.338440111420613e-05,
"logits/chosen": -55.58070755004883,
"logits/rejected": -61.17802810668945,
"logps/chosen": -188.1905517578125,
"logps/rejected": -136.31631469726562,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.6118099689483643,
"rewards/margins": 9.186898231506348,
"rewards/rejected": -6.575087547302246,
"step": 290
},
{
"epoch": 0.4178272980501393,
"grad_norm": 0.010859617963433266,
"learning_rate": 4.3152274837511605e-05,
"logits/chosen": -57.99853515625,
"logits/rejected": -67.6804428100586,
"logps/chosen": -188.915283203125,
"logps/rejected": -148.61837768554688,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.471911907196045,
"rewards/margins": 11.200289726257324,
"rewards/rejected": -8.728376388549805,
"step": 300
},
{
"epoch": 0.43175487465181056,
"grad_norm": 0.006545162294059992,
"learning_rate": 4.2920148560817086e-05,
"logits/chosen": -58.74571990966797,
"logits/rejected": -69.44742584228516,
"logps/chosen": -204.63160705566406,
"logps/rejected": -162.09996032714844,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.9170233011245728,
"rewards/margins": 11.851190567016602,
"rewards/rejected": -9.934167861938477,
"step": 310
},
{
"epoch": 0.4456824512534819,
"grad_norm": 0.0008512301137670875,
"learning_rate": 4.268802228412256e-05,
"logits/chosen": -58.301719665527344,
"logits/rejected": -70.69847106933594,
"logps/chosen": -183.65023803710938,
"logps/rejected": -150.41163635253906,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.81954288482666,
"rewards/margins": 11.889869689941406,
"rewards/rejected": -9.07032585144043,
"step": 320
},
{
"epoch": 0.4596100278551532,
"grad_norm": 0.01963173598051071,
"learning_rate": 4.245589600742804e-05,
"logits/chosen": -56.879295349121094,
"logits/rejected": -61.67259979248047,
"logps/chosen": -171.37228393554688,
"logps/rejected": -126.36521911621094,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.5972225666046143,
"rewards/margins": 9.433979988098145,
"rewards/rejected": -5.836757659912109,
"step": 330
},
{
"epoch": 0.4735376044568245,
"grad_norm": 0.08404721319675446,
"learning_rate": 4.222376973073352e-05,
"logits/chosen": -58.5677375793457,
"logits/rejected": -64.17628479003906,
"logps/chosen": -175.2589569091797,
"logps/rejected": -128.47628784179688,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.6719391345977783,
"rewards/margins": 9.350198745727539,
"rewards/rejected": -5.6782612800598145,
"step": 340
},
{
"epoch": 0.48746518105849584,
"grad_norm": 0.00980888307094574,
"learning_rate": 4.1991643454039e-05,
"logits/chosen": -59.190216064453125,
"logits/rejected": -64.348388671875,
"logps/chosen": -185.35513305664062,
"logps/rejected": -121.76618957519531,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.289034366607666,
"rewards/margins": 10.052196502685547,
"rewards/rejected": -6.7631635665893555,
"step": 350
},
{
"epoch": 0.5013927576601671,
"grad_norm": 0.0020706213545054197,
"learning_rate": 4.175951717734447e-05,
"logits/chosen": -58.2427864074707,
"logits/rejected": -60.856529235839844,
"logps/chosen": -194.89059448242188,
"logps/rejected": -143.33876037597656,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.073824405670166,
"rewards/margins": 10.457071304321289,
"rewards/rejected": -7.383247375488281,
"step": 360
},
{
"epoch": 0.5153203342618384,
"grad_norm": 0.058567263185977936,
"learning_rate": 4.1527390900649954e-05,
"logits/chosen": -57.275421142578125,
"logits/rejected": -59.9005012512207,
"logps/chosen": -190.70034790039062,
"logps/rejected": -146.0443878173828,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.392638683319092,
"rewards/margins": 11.20268440246582,
"rewards/rejected": -7.810046195983887,
"step": 370
},
{
"epoch": 0.5292479108635098,
"grad_norm": 0.04418317973613739,
"learning_rate": 4.129526462395543e-05,
"logits/chosen": -55.69623565673828,
"logits/rejected": -59.0958366394043,
"logps/chosen": -190.6586151123047,
"logps/rejected": -140.555419921875,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8878097534179688,
"rewards/margins": 10.44308090209961,
"rewards/rejected": -7.555271148681641,
"step": 380
},
{
"epoch": 0.5431754874651811,
"grad_norm": 0.03342704474925995,
"learning_rate": 4.106313834726091e-05,
"logits/chosen": -56.76637649536133,
"logits/rejected": -57.29325485229492,
"logps/chosen": -188.15127563476562,
"logps/rejected": -150.76486206054688,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7517261505126953,
"rewards/margins": 11.091150283813477,
"rewards/rejected": -8.339423179626465,
"step": 390
},
{
"epoch": 0.5571030640668524,
"grad_norm": 0.0036266562528908253,
"learning_rate": 4.0831012070566385e-05,
"logits/chosen": -56.438453674316406,
"logits/rejected": -58.3618049621582,
"logps/chosen": -189.70164489746094,
"logps/rejected": -153.29364013671875,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.944225788116455,
"rewards/margins": 11.944395065307617,
"rewards/rejected": -9.000168800354004,
"step": 400
},
{
"epoch": 0.5710306406685237,
"grad_norm": 0.2346808910369873,
"learning_rate": 4.0598885793871866e-05,
"logits/chosen": -55.23307418823242,
"logits/rejected": -58.60841751098633,
"logps/chosen": -192.96762084960938,
"logps/rejected": -159.66629028320312,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9198691844940186,
"rewards/margins": 12.067418098449707,
"rewards/rejected": -9.14754867553711,
"step": 410
},
{
"epoch": 0.584958217270195,
"grad_norm": 0.011593617498874664,
"learning_rate": 4.036675951717734e-05,
"logits/chosen": -54.3653564453125,
"logits/rejected": -54.57688522338867,
"logps/chosen": -199.13137817382812,
"logps/rejected": -155.0882568359375,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.6887240409851074,
"rewards/margins": 11.552785873413086,
"rewards/rejected": -8.86406135559082,
"step": 420
},
{
"epoch": 0.5988857938718662,
"grad_norm": 0.0311275701969862,
"learning_rate": 4.013463324048282e-05,
"logits/chosen": -54.45751190185547,
"logits/rejected": -55.70067596435547,
"logps/chosen": -180.8164825439453,
"logps/rejected": -157.07566833496094,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0822670459747314,
"rewards/margins": 12.752963066101074,
"rewards/rejected": -9.670696258544922,
"step": 430
},
{
"epoch": 0.6128133704735376,
"grad_norm": 0.0015138484304770827,
"learning_rate": 3.9902506963788303e-05,
"logits/chosen": -53.217491149902344,
"logits/rejected": -55.91618728637695,
"logps/chosen": -197.72886657714844,
"logps/rejected": -155.79360961914062,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0949454307556152,
"rewards/margins": 12.46452808380127,
"rewards/rejected": -9.36958122253418,
"step": 440
},
{
"epoch": 0.6267409470752089,
"grad_norm": 0.0017882110550999641,
"learning_rate": 3.9670380687093785e-05,
"logits/chosen": -53.5261116027832,
"logits/rejected": -53.344627380371094,
"logps/chosen": -178.9260711669922,
"logps/rejected": -149.0911407470703,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9560890197753906,
"rewards/margins": 12.503755569458008,
"rewards/rejected": -9.5476655960083,
"step": 450
},
{
"epoch": 0.6406685236768802,
"grad_norm": 0.0048352451995015144,
"learning_rate": 3.943825441039926e-05,
"logits/chosen": -51.4307746887207,
"logits/rejected": -46.89828872680664,
"logps/chosen": -165.9393768310547,
"logps/rejected": -152.39059448242188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.4784343242645264,
"rewards/margins": 13.470016479492188,
"rewards/rejected": -9.991582870483398,
"step": 460
},
{
"epoch": 0.6545961002785515,
"grad_norm": 0.00039086639299057424,
"learning_rate": 3.920612813370474e-05,
"logits/chosen": -52.365867614746094,
"logits/rejected": -54.72825241088867,
"logps/chosen": -174.74122619628906,
"logps/rejected": -155.45237731933594,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.434833526611328,
"rewards/margins": 12.323007583618164,
"rewards/rejected": -8.888172149658203,
"step": 470
},
{
"epoch": 0.6685236768802229,
"grad_norm": 0.003697748063132167,
"learning_rate": 3.8974001857010215e-05,
"logits/chosen": -52.16254425048828,
"logits/rejected": -54.24531936645508,
"logps/chosen": -181.53611755371094,
"logps/rejected": -157.99269104003906,
"loss": 0.0009,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.4443488121032715,
"rewards/margins": 12.453468322753906,
"rewards/rejected": -9.009119033813477,
"step": 480
},
{
"epoch": 0.6824512534818942,
"grad_norm": 0.002408905653283,
"learning_rate": 3.87418755803157e-05,
"logits/chosen": -51.06204605102539,
"logits/rejected": -50.75390625,
"logps/chosen": -188.5814666748047,
"logps/rejected": -163.3162078857422,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.4482767581939697,
"rewards/margins": 11.760849952697754,
"rewards/rejected": -9.312572479248047,
"step": 490
},
{
"epoch": 0.6963788300835655,
"grad_norm": 0.00516551174223423,
"learning_rate": 3.850974930362117e-05,
"logits/chosen": -50.288543701171875,
"logits/rejected": -50.007869720458984,
"logps/chosen": -195.97866821289062,
"logps/rejected": -161.41064453125,
"loss": 0.0076,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.8668901920318604,
"rewards/margins": 11.639043807983398,
"rewards/rejected": -9.772153854370117,
"step": 500
},
{
"epoch": 0.7103064066852368,
"grad_norm": 0.05361781641840935,
"learning_rate": 3.827762302692665e-05,
"logits/chosen": -51.25343704223633,
"logits/rejected": -51.83671951293945,
"logps/chosen": -194.06118774414062,
"logps/rejected": -160.32118225097656,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.963975429534912,
"rewards/margins": 11.554098129272461,
"rewards/rejected": -8.590123176574707,
"step": 510
},
{
"epoch": 0.724233983286908,
"grad_norm": 0.058117084205150604,
"learning_rate": 3.804549675023213e-05,
"logits/chosen": -51.83235549926758,
"logits/rejected": -52.084144592285156,
"logps/chosen": -192.75059509277344,
"logps/rejected": -153.1986846923828,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.5486092567443848,
"rewards/margins": 11.368444442749023,
"rewards/rejected": -8.81983470916748,
"step": 520
},
{
"epoch": 0.7381615598885793,
"grad_norm": 0.0007053585723042488,
"learning_rate": 3.781337047353761e-05,
"logits/chosen": -51.002742767333984,
"logits/rejected": -50.87931823730469,
"logps/chosen": -194.43157958984375,
"logps/rejected": -167.8184356689453,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.1437911987304688,
"rewards/margins": 12.299717903137207,
"rewards/rejected": -10.155926704406738,
"step": 530
},
{
"epoch": 0.7520891364902507,
"grad_norm": 0.0001465307577745989,
"learning_rate": 3.758124419684308e-05,
"logits/chosen": -52.29041290283203,
"logits/rejected": -55.17450714111328,
"logps/chosen": -182.52536010742188,
"logps/rejected": -171.62203979492188,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.4032492637634277,
"rewards/margins": 13.419294357299805,
"rewards/rejected": -11.016047477722168,
"step": 540
},
{
"epoch": 0.766016713091922,
"grad_norm": 0.004270358011126518,
"learning_rate": 3.7349117920148565e-05,
"logits/chosen": -51.10778045654297,
"logits/rejected": -51.92633819580078,
"logps/chosen": -183.72921752929688,
"logps/rejected": -163.09469604492188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8001818656921387,
"rewards/margins": 13.584017753601074,
"rewards/rejected": -10.78383731842041,
"step": 550
},
{
"epoch": 0.7799442896935933,
"grad_norm": 0.0015566727379336953,
"learning_rate": 3.711699164345404e-05,
"logits/chosen": -51.11431884765625,
"logits/rejected": -55.25889205932617,
"logps/chosen": -181.35389709472656,
"logps/rejected": -168.32589721679688,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.060955762863159,
"rewards/margins": 11.915121078491211,
"rewards/rejected": -9.854166030883789,
"step": 560
},
{
"epoch": 0.7938718662952646,
"grad_norm": 0.0005651906249113381,
"learning_rate": 3.688486536675952e-05,
"logits/chosen": -52.69512176513672,
"logits/rejected": -53.26996994018555,
"logps/chosen": -211.1423797607422,
"logps/rejected": -173.87876892089844,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.2983193397521973,
"rewards/margins": 13.304783821105957,
"rewards/rejected": -11.006464004516602,
"step": 570
},
{
"epoch": 0.807799442896936,
"grad_norm": 0.0008942225249484181,
"learning_rate": 3.6652739090064995e-05,
"logits/chosen": -51.08681869506836,
"logits/rejected": -53.34214401245117,
"logps/chosen": -198.1741943359375,
"logps/rejected": -172.56463623046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.6808273792266846,
"rewards/margins": 13.719111442565918,
"rewards/rejected": -11.03828239440918,
"step": 580
},
{
"epoch": 0.8217270194986073,
"grad_norm": 0.0006534375716000795,
"learning_rate": 3.642061281337048e-05,
"logits/chosen": -50.69335174560547,
"logits/rejected": -53.41472625732422,
"logps/chosen": -187.2412109375,
"logps/rejected": -174.28054809570312,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.670588254928589,
"rewards/margins": 13.738470077514648,
"rewards/rejected": -11.067883491516113,
"step": 590
},
{
"epoch": 0.8356545961002786,
"grad_norm": 0.0013752166414633393,
"learning_rate": 3.618848653667595e-05,
"logits/chosen": -52.0291748046875,
"logits/rejected": -54.64427947998047,
"logps/chosen": -201.79702758789062,
"logps/rejected": -173.984130859375,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.147660732269287,
"rewards/margins": 13.210504531860352,
"rewards/rejected": -11.062845230102539,
"step": 600
},
{
"epoch": 0.8495821727019499,
"grad_norm": 0.006681976839900017,
"learning_rate": 3.595636025998143e-05,
"logits/chosen": -49.95285415649414,
"logits/rejected": -50.84339141845703,
"logps/chosen": -196.45458984375,
"logps/rejected": -179.96713256835938,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.1962101459503174,
"rewards/margins": 13.848483085632324,
"rewards/rejected": -11.652273178100586,
"step": 610
},
{
"epoch": 0.8635097493036211,
"grad_norm": 0.0005970289348624647,
"learning_rate": 3.572423398328691e-05,
"logits/chosen": -50.802921295166016,
"logits/rejected": -51.33774948120117,
"logps/chosen": -198.7797393798828,
"logps/rejected": -179.11044311523438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.640532970428467,
"rewards/margins": 14.48454475402832,
"rewards/rejected": -11.844011306762695,
"step": 620
},
{
"epoch": 0.8774373259052924,
"grad_norm": 0.0030954822432249784,
"learning_rate": 3.549210770659239e-05,
"logits/chosen": -51.285545349121094,
"logits/rejected": -59.092987060546875,
"logps/chosen": -203.8007049560547,
"logps/rejected": -172.9857177734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.8215763568878174,
"rewards/margins": 12.862950325012207,
"rewards/rejected": -11.041373252868652,
"step": 630
},
{
"epoch": 0.8913649025069638,
"grad_norm": 0.000901729566976428,
"learning_rate": 3.525998142989786e-05,
"logits/chosen": -50.075538635253906,
"logits/rejected": -51.51918411254883,
"logps/chosen": -220.5761260986328,
"logps/rejected": -181.4318389892578,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.931749939918518,
"rewards/margins": 13.427146911621094,
"rewards/rejected": -11.495397567749023,
"step": 640
},
{
"epoch": 0.9052924791086351,
"grad_norm": 0.09048446267843246,
"learning_rate": 3.5027855153203345e-05,
"logits/chosen": -50.31797409057617,
"logits/rejected": -54.93279266357422,
"logps/chosen": -192.12985229492188,
"logps/rejected": -168.02911376953125,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.672158718109131,
"rewards/margins": 12.830169677734375,
"rewards/rejected": -10.158011436462402,
"step": 650
},
{
"epoch": 0.9192200557103064,
"grad_norm": 0.0008625476621091366,
"learning_rate": 3.479572887650882e-05,
"logits/chosen": -49.658103942871094,
"logits/rejected": -49.33926010131836,
"logps/chosen": -185.3748779296875,
"logps/rejected": -168.5997772216797,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0287516117095947,
"rewards/margins": 13.385884284973145,
"rewards/rejected": -10.357132911682129,
"step": 660
},
{
"epoch": 0.9331476323119777,
"grad_norm": 0.0020681144669651985,
"learning_rate": 3.45636025998143e-05,
"logits/chosen": -50.34550094604492,
"logits/rejected": -46.77113342285156,
"logps/chosen": -203.38619995117188,
"logps/rejected": -169.04295349121094,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0530314445495605,
"rewards/margins": 13.665138244628906,
"rewards/rejected": -10.612106323242188,
"step": 670
},
{
"epoch": 0.947075208913649,
"grad_norm": 0.0008213380351662636,
"learning_rate": 3.4331476323119775e-05,
"logits/chosen": -50.81815719604492,
"logits/rejected": -48.66960906982422,
"logps/chosen": -195.8907470703125,
"logps/rejected": -169.99732971191406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.5576915740966797,
"rewards/margins": 13.460963249206543,
"rewards/rejected": -10.903271675109863,
"step": 680
},
{
"epoch": 0.9610027855153204,
"grad_norm": 0.0015723519027233124,
"learning_rate": 3.4099350046425257e-05,
"logits/chosen": -48.638885498046875,
"logits/rejected": -50.73641586303711,
"logps/chosen": -176.6822967529297,
"logps/rejected": -164.06044006347656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8135972023010254,
"rewards/margins": 12.596272468566895,
"rewards/rejected": -9.782674789428711,
"step": 690
},
{
"epoch": 0.9749303621169917,
"grad_norm": 0.00701997522264719,
"learning_rate": 3.386722376973073e-05,
"logits/chosen": -50.4714469909668,
"logits/rejected": -52.91802978515625,
"logps/chosen": -184.271728515625,
"logps/rejected": -162.01087951660156,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.934034824371338,
"rewards/margins": 12.961067199707031,
"rewards/rejected": -10.027031898498535,
"step": 700
},
{
"epoch": 0.9888579387186629,
"grad_norm": 0.003028564853593707,
"learning_rate": 3.363509749303621e-05,
"logits/chosen": -49.239967346191406,
"logits/rejected": -47.960716247558594,
"logps/chosen": -182.0518341064453,
"logps/rejected": -172.40980529785156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.2808187007904053,
"rewards/margins": 12.81373119354248,
"rewards/rejected": -10.53291130065918,
"step": 710
},
{
"epoch": 1.0027855153203342,
"grad_norm": 0.0019847529474645853,
"learning_rate": 3.3402971216341694e-05,
"logits/chosen": -48.445213317871094,
"logits/rejected": -48.705787658691406,
"logps/chosen": -196.92776489257812,
"logps/rejected": -176.7248077392578,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7315926551818848,
"rewards/margins": 13.8847017288208,
"rewards/rejected": -11.153108596801758,
"step": 720
},
{
"epoch": 1.0167130919220055,
"grad_norm": 0.00019169057486578822,
"learning_rate": 3.3170844939647175e-05,
"logits/chosen": -47.656700134277344,
"logits/rejected": -47.6513786315918,
"logps/chosen": -185.78167724609375,
"logps/rejected": -178.4099578857422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.055850028991699,
"rewards/margins": 14.758455276489258,
"rewards/rejected": -11.702605247497559,
"step": 730
},
{
"epoch": 1.0306406685236769,
"grad_norm": 0.0018290742300450802,
"learning_rate": 3.293871866295265e-05,
"logits/chosen": -47.585262298583984,
"logits/rejected": -47.93593215942383,
"logps/chosen": -189.79299926757812,
"logps/rejected": -173.1365509033203,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0570778846740723,
"rewards/margins": 14.200826644897461,
"rewards/rejected": -11.143750190734863,
"step": 740
},
{
"epoch": 1.0445682451253482,
"grad_norm": 0.002084003994241357,
"learning_rate": 3.270659238625813e-05,
"logits/chosen": -48.902366638183594,
"logits/rejected": -49.37635040283203,
"logps/chosen": -190.6774444580078,
"logps/rejected": -176.73709106445312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9810030460357666,
"rewards/margins": 13.916801452636719,
"rewards/rejected": -10.935799598693848,
"step": 750
},
{
"epoch": 1.0584958217270195,
"grad_norm": 0.00043452094541862607,
"learning_rate": 3.2474466109563606e-05,
"logits/chosen": -47.70875549316406,
"logits/rejected": -47.576881408691406,
"logps/chosen": -174.31796264648438,
"logps/rejected": -172.3538360595703,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.190167188644409,
"rewards/margins": 14.282261848449707,
"rewards/rejected": -11.092094421386719,
"step": 760
},
{
"epoch": 1.0724233983286908,
"grad_norm": 0.0001380470785079524,
"learning_rate": 3.224233983286909e-05,
"logits/chosen": -44.77172088623047,
"logits/rejected": -48.8241081237793,
"logps/chosen": -185.09288024902344,
"logps/rejected": -175.90911865234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.2085177898406982,
"rewards/margins": 14.184524536132812,
"rewards/rejected": -10.976004600524902,
"step": 770
},
{
"epoch": 1.0863509749303621,
"grad_norm": 0.0007571239257231355,
"learning_rate": 3.201021355617456e-05,
"logits/chosen": -46.60997772216797,
"logits/rejected": -47.1086311340332,
"logps/chosen": -181.51124572753906,
"logps/rejected": -176.6173095703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.862274646759033,
"rewards/margins": 14.07470989227295,
"rewards/rejected": -11.212434768676758,
"step": 780
},
{
"epoch": 1.1002785515320335,
"grad_norm": 0.0007815372664481401,
"learning_rate": 3.177808727948004e-05,
"logits/chosen": -46.209800720214844,
"logits/rejected": -49.99565505981445,
"logps/chosen": -189.17105102539062,
"logps/rejected": -175.38833618164062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.5530970096588135,
"rewards/margins": 14.227849006652832,
"rewards/rejected": -11.674753189086914,
"step": 790
},
{
"epoch": 1.1142061281337048,
"grad_norm": 4.730727232526988e-05,
"learning_rate": 3.154596100278552e-05,
"logits/chosen": -45.40021514892578,
"logits/rejected": -46.26294708251953,
"logps/chosen": -192.1022491455078,
"logps/rejected": -183.72848510742188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7471506595611572,
"rewards/margins": 14.762069702148438,
"rewards/rejected": -12.014918327331543,
"step": 800
},
{
"epoch": 1.128133704735376,
"grad_norm": 0.0005188810173422098,
"learning_rate": 3.1313834726091e-05,
"logits/chosen": -47.36610412597656,
"logits/rejected": -46.83913040161133,
"logps/chosen": -181.6043701171875,
"logps/rejected": -183.43173217773438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.78470778465271,
"rewards/margins": 15.225115776062012,
"rewards/rejected": -12.440409660339355,
"step": 810
},
{
"epoch": 1.1420612813370474,
"grad_norm": 0.003626539371907711,
"learning_rate": 3.1081708449396474e-05,
"logits/chosen": -47.204830169677734,
"logits/rejected": -49.52333450317383,
"logps/chosen": -180.70919799804688,
"logps/rejected": -169.68026733398438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0807857513427734,
"rewards/margins": 14.249502182006836,
"rewards/rejected": -11.168718338012695,
"step": 820
},
{
"epoch": 1.1559888579387188,
"grad_norm": 0.0012059325817972422,
"learning_rate": 3.0849582172701955e-05,
"logits/chosen": -46.76143264770508,
"logits/rejected": -48.97161102294922,
"logps/chosen": -174.12811279296875,
"logps/rejected": -175.88150024414062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8706345558166504,
"rewards/margins": 14.090449333190918,
"rewards/rejected": -11.219817161560059,
"step": 830
},
{
"epoch": 1.16991643454039,
"grad_norm": 0.0010171913309022784,
"learning_rate": 3.061745589600743e-05,
"logits/chosen": -48.58325958251953,
"logits/rejected": -46.89453125,
"logps/chosen": -173.7610626220703,
"logps/rejected": -183.8321533203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.4698550701141357,
"rewards/margins": 14.651331901550293,
"rewards/rejected": -12.181478500366211,
"step": 840
},
{
"epoch": 1.1838440111420612,
"grad_norm": 0.0009448982309550047,
"learning_rate": 3.0385329619312908e-05,
"logits/chosen": -47.29355239868164,
"logits/rejected": -46.92559051513672,
"logps/chosen": -196.41555786132812,
"logps/rejected": -175.25381469726562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1618518829345703,
"rewards/margins": 14.048612594604492,
"rewards/rejected": -10.886759757995605,
"step": 850
},
{
"epoch": 1.1977715877437327,
"grad_norm": 0.0016251832712441683,
"learning_rate": 3.0153203342618386e-05,
"logits/chosen": -46.79810333251953,
"logits/rejected": -46.351219177246094,
"logps/chosen": -192.8514404296875,
"logps/rejected": -179.13809204101562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0230753421783447,
"rewards/margins": 14.267328262329102,
"rewards/rejected": -11.244253158569336,
"step": 860
},
{
"epoch": 1.2116991643454038,
"grad_norm": 0.0008583049057051539,
"learning_rate": 2.9921077065923864e-05,
"logits/chosen": -46.299705505371094,
"logits/rejected": -52.336578369140625,
"logps/chosen": -183.10208129882812,
"logps/rejected": -179.01170349121094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.5486302375793457,
"rewards/margins": 13.563204765319824,
"rewards/rejected": -11.014575004577637,
"step": 870
},
{
"epoch": 1.2256267409470751,
"grad_norm": 0.002296986524015665,
"learning_rate": 2.9688950789229342e-05,
"logits/chosen": -47.59013366699219,
"logits/rejected": -44.00934600830078,
"logps/chosen": -205.0873565673828,
"logps/rejected": -178.86834716796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8372130393981934,
"rewards/margins": 13.294425964355469,
"rewards/rejected": -10.457212448120117,
"step": 880
},
{
"epoch": 1.2395543175487465,
"grad_norm": 0.000371147325495258,
"learning_rate": 2.945682451253482e-05,
"logits/chosen": -42.788970947265625,
"logits/rejected": -49.1223030090332,
"logps/chosen": -173.95599365234375,
"logps/rejected": -172.49954223632812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.9040775299072266,
"rewards/margins": 15.029403686523438,
"rewards/rejected": -11.125325202941895,
"step": 890
},
{
"epoch": 1.2534818941504178,
"grad_norm": 0.0004314547113608569,
"learning_rate": 2.9224698235840298e-05,
"logits/chosen": -45.99776840209961,
"logits/rejected": -46.92440414428711,
"logps/chosen": -194.90078735351562,
"logps/rejected": -178.046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8200998306274414,
"rewards/margins": 14.157859802246094,
"rewards/rejected": -11.337759971618652,
"step": 900
},
{
"epoch": 1.267409470752089,
"grad_norm": 0.0007565075648017228,
"learning_rate": 2.8992571959145776e-05,
"logits/chosen": -47.412750244140625,
"logits/rejected": -47.732460021972656,
"logps/chosen": -182.77047729492188,
"logps/rejected": -174.72396850585938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9069650173187256,
"rewards/margins": 13.822225570678711,
"rewards/rejected": -10.915260314941406,
"step": 910
},
{
"epoch": 1.2813370473537604,
"grad_norm": 0.00014751723210792989,
"learning_rate": 2.8760445682451254e-05,
"logits/chosen": -45.15221405029297,
"logits/rejected": -45.39213180541992,
"logps/chosen": -180.3527069091797,
"logps/rejected": -173.14462280273438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.407719135284424,
"rewards/margins": 14.697021484375,
"rewards/rejected": -11.289301872253418,
"step": 920
},
{
"epoch": 1.2952646239554317,
"grad_norm": 0.00017833193123806268,
"learning_rate": 2.852831940575673e-05,
"logits/chosen": -45.37639617919922,
"logits/rejected": -46.76648712158203,
"logps/chosen": -184.28834533691406,
"logps/rejected": -172.79425048828125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.130150556564331,
"rewards/margins": 14.1089506149292,
"rewards/rejected": -10.978799819946289,
"step": 930
},
{
"epoch": 1.309192200557103,
"grad_norm": 2.0110555851715617e-05,
"learning_rate": 2.829619312906221e-05,
"logits/chosen": -43.93904495239258,
"logits/rejected": -43.89077377319336,
"logps/chosen": -195.8922119140625,
"logps/rejected": -177.62008666992188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.138190507888794,
"rewards/margins": 14.122373580932617,
"rewards/rejected": -10.984184265136719,
"step": 940
},
{
"epoch": 1.3231197771587744,
"grad_norm": 0.0016257427632808685,
"learning_rate": 2.8064066852367688e-05,
"logits/chosen": -44.54652404785156,
"logits/rejected": -43.146263122558594,
"logps/chosen": -189.8309783935547,
"logps/rejected": -182.90872192382812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7086493968963623,
"rewards/margins": 15.096916198730469,
"rewards/rejected": -12.388265609741211,
"step": 950
},
{
"epoch": 1.3370473537604457,
"grad_norm": 0.00040859784348867834,
"learning_rate": 2.7831940575673166e-05,
"logits/chosen": -44.68381881713867,
"logits/rejected": -46.662906646728516,
"logps/chosen": -190.71554565429688,
"logps/rejected": -173.70538330078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.913283586502075,
"rewards/margins": 13.87846565246582,
"rewards/rejected": -10.965181350708008,
"step": 960
},
{
"epoch": 1.350974930362117,
"grad_norm": 0.0006592085701413453,
"learning_rate": 2.7599814298978644e-05,
"logits/chosen": -42.80352020263672,
"logits/rejected": -43.12960433959961,
"logps/chosen": -187.87155151367188,
"logps/rejected": -174.0194549560547,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.122223138809204,
"rewards/margins": 14.28703784942627,
"rewards/rejected": -11.164815902709961,
"step": 970
},
{
"epoch": 1.3649025069637883,
"grad_norm": 0.00013744817988481373,
"learning_rate": 2.736768802228412e-05,
"logits/chosen": -41.9451904296875,
"logits/rejected": -47.34199523925781,
"logps/chosen": -172.63613891601562,
"logps/rejected": -174.45248413085938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9825220108032227,
"rewards/margins": 14.628707885742188,
"rewards/rejected": -11.646185874938965,
"step": 980
},
{
"epoch": 1.3788300835654597,
"grad_norm": 0.00018170691328123212,
"learning_rate": 2.71355617455896e-05,
"logits/chosen": -44.152679443359375,
"logits/rejected": -43.70296096801758,
"logps/chosen": -189.96286010742188,
"logps/rejected": -182.75204467773438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9260449409484863,
"rewards/margins": 15.165403366088867,
"rewards/rejected": -12.239357948303223,
"step": 990
},
{
"epoch": 1.392757660167131,
"grad_norm": 0.00020125451555941254,
"learning_rate": 2.6903435468895084e-05,
"logits/chosen": -43.45044708251953,
"logits/rejected": -53.4655647277832,
"logps/chosen": -191.4513397216797,
"logps/rejected": -173.65176391601562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9874825477600098,
"rewards/margins": 13.630624771118164,
"rewards/rejected": -10.64314079284668,
"step": 1000
},
{
"epoch": 1.4066852367688023,
"grad_norm": 0.00031613241299055517,
"learning_rate": 2.6671309192200562e-05,
"logits/chosen": -45.708953857421875,
"logits/rejected": -36.509056091308594,
"logps/chosen": -175.71456909179688,
"logps/rejected": -179.5768280029297,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7891390323638916,
"rewards/margins": 15.765680313110352,
"rewards/rejected": -12.976541519165039,
"step": 1010
},
{
"epoch": 1.4206128133704734,
"grad_norm": 0.0009248528513126075,
"learning_rate": 2.643918291550604e-05,
"logits/chosen": -45.35698318481445,
"logits/rejected": -44.74424362182617,
"logps/chosen": -188.96438598632812,
"logps/rejected": -184.33306884765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.623995780944824,
"rewards/margins": 14.462275505065918,
"rewards/rejected": -11.83828067779541,
"step": 1020
},
{
"epoch": 1.434540389972145,
"grad_norm": 0.00033521506702527404,
"learning_rate": 2.620705663881152e-05,
"logits/chosen": -42.90143585205078,
"logits/rejected": -42.27935791015625,
"logps/chosen": -187.33126831054688,
"logps/rejected": -179.48333740234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8840413093566895,
"rewards/margins": 14.168217658996582,
"rewards/rejected": -11.28417682647705,
"step": 1030
},
{
"epoch": 1.448467966573816,
"grad_norm": 3.1652274628868327e-05,
"learning_rate": 2.5974930362116996e-05,
"logits/chosen": -42.856422424316406,
"logits/rejected": -46.94526290893555,
"logps/chosen": -180.2850341796875,
"logps/rejected": -180.52711486816406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.3675103187561035,
"rewards/margins": 14.869485855102539,
"rewards/rejected": -11.501976013183594,
"step": 1040
},
{
"epoch": 1.4623955431754876,
"grad_norm": 0.0001855907030403614,
"learning_rate": 2.5742804085422474e-05,
"logits/chosen": -43.780208587646484,
"logits/rejected": -45.45014953613281,
"logps/chosen": -189.4515838623047,
"logps/rejected": -171.45359802246094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.998511791229248,
"rewards/margins": 13.935274124145508,
"rewards/rejected": -10.936761856079102,
"step": 1050
},
{
"epoch": 1.4763231197771587,
"grad_norm": 0.0009718401124700904,
"learning_rate": 2.5510677808727952e-05,
"logits/chosen": -42.94011688232422,
"logits/rejected": -43.77968215942383,
"logps/chosen": -199.50701904296875,
"logps/rejected": -189.60020446777344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7382802963256836,
"rewards/margins": 15.149266242980957,
"rewards/rejected": -12.410985946655273,
"step": 1060
},
{
"epoch": 1.49025069637883,
"grad_norm": 0.0005517892423085868,
"learning_rate": 2.527855153203343e-05,
"logits/chosen": -41.704933166503906,
"logits/rejected": -42.4830322265625,
"logps/chosen": -188.4182891845703,
"logps/rejected": -180.1853790283203,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.676875352859497,
"rewards/margins": 15.806604385375977,
"rewards/rejected": -12.129728317260742,
"step": 1070
},
{
"epoch": 1.5041782729805013,
"grad_norm": 0.0015563963679596782,
"learning_rate": 2.504642525533891e-05,
"logits/chosen": -43.79706954956055,
"logits/rejected": -45.16236114501953,
"logps/chosen": -183.44960021972656,
"logps/rejected": -181.75918579101562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9543235301971436,
"rewards/margins": 14.605280876159668,
"rewards/rejected": -11.650958061218262,
"step": 1080
},
{
"epoch": 1.5181058495821727,
"grad_norm": 0.0001682774309301749,
"learning_rate": 2.4814298978644386e-05,
"logits/chosen": -43.524757385253906,
"logits/rejected": -47.19926071166992,
"logps/chosen": -179.42062377929688,
"logps/rejected": -173.64984130859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.019610643386841,
"rewards/margins": 14.090364456176758,
"rewards/rejected": -11.070755004882812,
"step": 1090
},
{
"epoch": 1.532033426183844,
"grad_norm": 0.0002563179295975715,
"learning_rate": 2.4582172701949864e-05,
"logits/chosen": -41.658203125,
"logits/rejected": -45.531280517578125,
"logps/chosen": -168.8417205810547,
"logps/rejected": -179.17169189453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.318143844604492,
"rewards/margins": 14.908279418945312,
"rewards/rejected": -11.59013557434082,
"step": 1100
},
{
"epoch": 1.5459610027855153,
"grad_norm": 0.0005912501364946365,
"learning_rate": 2.4350046425255342e-05,
"logits/chosen": -42.020301818847656,
"logits/rejected": -41.976051330566406,
"logps/chosen": -194.45645141601562,
"logps/rejected": -180.68141174316406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.85841703414917,
"rewards/margins": 14.995101928710938,
"rewards/rejected": -12.136682510375977,
"step": 1110
},
{
"epoch": 1.5598885793871866,
"grad_norm": 0.002638684120029211,
"learning_rate": 2.411792014856082e-05,
"logits/chosen": -41.923728942871094,
"logits/rejected": -46.06597137451172,
"logps/chosen": -198.2957000732422,
"logps/rejected": -185.3656768798828,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1566615104675293,
"rewards/margins": 14.20202922821045,
"rewards/rejected": -11.045369148254395,
"step": 1120
},
{
"epoch": 1.573816155988858,
"grad_norm": 2.0041943571413867e-05,
"learning_rate": 2.3885793871866298e-05,
"logits/chosen": -44.468955993652344,
"logits/rejected": -42.48888397216797,
"logps/chosen": -184.76351928710938,
"logps/rejected": -182.19944763183594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.2618041038513184,
"rewards/margins": 15.966888427734375,
"rewards/rejected": -12.705083847045898,
"step": 1130
},
{
"epoch": 1.5877437325905293,
"grad_norm": 1.645497468416579e-05,
"learning_rate": 2.3653667595171773e-05,
"logits/chosen": -42.27872848510742,
"logits/rejected": -43.35807418823242,
"logps/chosen": -205.8543243408203,
"logps/rejected": -180.86024475097656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8403396606445312,
"rewards/margins": 15.042218208312988,
"rewards/rejected": -12.201878547668457,
"step": 1140
},
{
"epoch": 1.6016713091922006,
"grad_norm": 0.0003787026507779956,
"learning_rate": 2.342154131847725e-05,
"logits/chosen": -42.156349182128906,
"logits/rejected": -37.70032501220703,
"logps/chosen": -194.82247924804688,
"logps/rejected": -176.8505096435547,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.513833999633789,
"rewards/margins": 14.492878913879395,
"rewards/rejected": -10.979044914245605,
"step": 1150
},
{
"epoch": 1.615598885793872,
"grad_norm": 7.55380024202168e-05,
"learning_rate": 2.318941504178273e-05,
"logits/chosen": -43.539344787597656,
"logits/rejected": -41.69083023071289,
"logps/chosen": -187.00527954101562,
"logps/rejected": -177.04592895507812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1924500465393066,
"rewards/margins": 14.285606384277344,
"rewards/rejected": -11.093156814575195,
"step": 1160
},
{
"epoch": 1.6295264623955432,
"grad_norm": 0.0002465043216943741,
"learning_rate": 2.2957288765088207e-05,
"logits/chosen": -42.10594177246094,
"logits/rejected": -41.667667388916016,
"logps/chosen": -190.01614379882812,
"logps/rejected": -181.31919860839844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0133557319641113,
"rewards/margins": 14.723991394042969,
"rewards/rejected": -11.710634231567383,
"step": 1170
},
{
"epoch": 1.6434540389972145,
"grad_norm": 3.898440991179086e-05,
"learning_rate": 2.2725162488393685e-05,
"logits/chosen": -42.041419982910156,
"logits/rejected": -48.19127655029297,
"logps/chosen": -188.48135375976562,
"logps/rejected": -179.7986602783203,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.3684513568878174,
"rewards/margins": 15.300027847290039,
"rewards/rejected": -11.931575775146484,
"step": 1180
},
{
"epoch": 1.6573816155988856,
"grad_norm": 0.00035718828439712524,
"learning_rate": 2.2493036211699163e-05,
"logits/chosen": -42.593849182128906,
"logits/rejected": -49.53551483154297,
"logps/chosen": -189.70465087890625,
"logps/rejected": -187.06527709960938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.751575469970703,
"rewards/margins": 14.097253799438477,
"rewards/rejected": -11.345675468444824,
"step": 1190
},
{
"epoch": 1.6713091922005572,
"grad_norm": 0.00047763698967173696,
"learning_rate": 2.226090993500464e-05,
"logits/chosen": -42.90016555786133,
"logits/rejected": -48.32063293457031,
"logps/chosen": -173.6857452392578,
"logps/rejected": -173.83023071289062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.3109641075134277,
"rewards/margins": 13.790491104125977,
"rewards/rejected": -10.47952651977539,
"step": 1200
},
{
"epoch": 1.6852367688022283,
"grad_norm": 0.000792037055362016,
"learning_rate": 2.2028783658310122e-05,
"logits/chosen": -39.64678192138672,
"logits/rejected": -48.17890930175781,
"logps/chosen": -192.51730346679688,
"logps/rejected": -169.37339782714844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.397125244140625,
"rewards/margins": 13.744363784790039,
"rewards/rejected": -10.347238540649414,
"step": 1210
},
{
"epoch": 1.6991643454038998,
"grad_norm": 0.0005765881505794823,
"learning_rate": 2.17966573816156e-05,
"logits/chosen": -38.50248336791992,
"logits/rejected": -39.00818634033203,
"logps/chosen": -195.1656494140625,
"logps/rejected": -186.5626678466797,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.825869083404541,
"rewards/margins": 14.93237590789795,
"rewards/rejected": -12.10650634765625,
"step": 1220
},
{
"epoch": 1.713091922005571,
"grad_norm": 0.0004956713528372347,
"learning_rate": 2.1564531104921078e-05,
"logits/chosen": -42.719886779785156,
"logits/rejected": -43.95571517944336,
"logps/chosen": -181.20785522460938,
"logps/rejected": -173.41934204101562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.436485767364502,
"rewards/margins": 14.404312133789062,
"rewards/rejected": -10.967824935913086,
"step": 1230
},
{
"epoch": 1.7270194986072425,
"grad_norm": 0.004964966792613268,
"learning_rate": 2.1332404828226556e-05,
"logits/chosen": -40.15790939331055,
"logits/rejected": -46.36140441894531,
"logps/chosen": -194.02664184570312,
"logps/rejected": -174.8671112060547,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7149300575256348,
"rewards/margins": 13.3515043258667,
"rewards/rejected": -10.636574745178223,
"step": 1240
},
{
"epoch": 1.7409470752089136,
"grad_norm": 0.0004779207520186901,
"learning_rate": 2.1100278551532034e-05,
"logits/chosen": -38.4859733581543,
"logits/rejected": -43.74262237548828,
"logps/chosen": -169.06556701660156,
"logps/rejected": -180.42384338378906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.352203369140625,
"rewards/margins": 14.585695266723633,
"rewards/rejected": -11.233491897583008,
"step": 1250
},
{
"epoch": 1.754874651810585,
"grad_norm": 0.0005939751281403005,
"learning_rate": 2.0868152274837512e-05,
"logits/chosen": -40.667049407958984,
"logits/rejected": -40.19790267944336,
"logps/chosen": -196.654052734375,
"logps/rejected": -183.01376342773438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.437725305557251,
"rewards/margins": 13.927282333374023,
"rewards/rejected": -11.489557266235352,
"step": 1260
},
{
"epoch": 1.7688022284122562,
"grad_norm": 0.0006439912249334157,
"learning_rate": 2.063602599814299e-05,
"logits/chosen": -41.551029205322266,
"logits/rejected": -46.43274688720703,
"logps/chosen": -192.17355346679688,
"logps/rejected": -172.40625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9757094383239746,
"rewards/margins": 13.070402145385742,
"rewards/rejected": -10.09469223022461,
"step": 1270
},
{
"epoch": 1.7827298050139275,
"grad_norm": 0.001160840387456119,
"learning_rate": 2.0403899721448468e-05,
"logits/chosen": -42.423248291015625,
"logits/rejected": -43.39242172241211,
"logps/chosen": -188.78314208984375,
"logps/rejected": -178.84017944335938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.4687724113464355,
"rewards/margins": 13.714757919311523,
"rewards/rejected": -11.245985984802246,
"step": 1280
},
{
"epoch": 1.7966573816155988,
"grad_norm": 0.0008970465278252959,
"learning_rate": 2.0171773444753946e-05,
"logits/chosen": -39.771629333496094,
"logits/rejected": -45.55982208251953,
"logps/chosen": -175.4274444580078,
"logps/rejected": -178.88775634765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9034018516540527,
"rewards/margins": 14.535983085632324,
"rewards/rejected": -11.632580757141113,
"step": 1290
},
{
"epoch": 1.8105849582172702,
"grad_norm": 0.0006431234069168568,
"learning_rate": 1.9939647168059424e-05,
"logits/chosen": -39.90253448486328,
"logits/rejected": -44.82994842529297,
"logps/chosen": -189.65737915039062,
"logps/rejected": -178.84754943847656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.19281005859375,
"rewards/margins": 14.324234008789062,
"rewards/rejected": -11.131423950195312,
"step": 1300
},
{
"epoch": 1.8245125348189415,
"grad_norm": 0.0008485277649015188,
"learning_rate": 1.9707520891364902e-05,
"logits/chosen": -40.094932556152344,
"logits/rejected": -45.365562438964844,
"logps/chosen": -182.76187133789062,
"logps/rejected": -174.17022705078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.948390245437622,
"rewards/margins": 13.181282043457031,
"rewards/rejected": -10.232892990112305,
"step": 1310
},
{
"epoch": 1.8384401114206128,
"grad_norm": 0.0004803981864824891,
"learning_rate": 1.947539461467038e-05,
"logits/chosen": -38.52114486694336,
"logits/rejected": -37.8065299987793,
"logps/chosen": -188.72463989257812,
"logps/rejected": -189.0813446044922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8810222148895264,
"rewards/margins": 15.528773307800293,
"rewards/rejected": -12.647750854492188,
"step": 1320
},
{
"epoch": 1.8523676880222841,
"grad_norm": 0.0011859548976644874,
"learning_rate": 1.9243268337975858e-05,
"logits/chosen": -39.07917785644531,
"logits/rejected": -42.37941360473633,
"logps/chosen": -177.0192108154297,
"logps/rejected": -176.3035125732422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.3045661449432373,
"rewards/margins": 14.152140617370605,
"rewards/rejected": -10.847575187683105,
"step": 1330
},
{
"epoch": 1.8662952646239555,
"grad_norm": 0.00019842004985548556,
"learning_rate": 1.9011142061281336e-05,
"logits/chosen": -38.62647247314453,
"logits/rejected": -41.89341735839844,
"logps/chosen": -195.18069458007812,
"logps/rejected": -179.20639038085938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8785204887390137,
"rewards/margins": 13.824996948242188,
"rewards/rejected": -10.946475982666016,
"step": 1340
},
{
"epoch": 1.8802228412256268,
"grad_norm": 2.9733255360042676e-05,
"learning_rate": 1.8779015784586814e-05,
"logits/chosen": -39.85136795043945,
"logits/rejected": -37.52424240112305,
"logps/chosen": -168.53857421875,
"logps/rejected": -179.1663360595703,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0412697792053223,
"rewards/margins": 15.687167167663574,
"rewards/rejected": -12.645896911621094,
"step": 1350
},
{
"epoch": 1.894150417827298,
"grad_norm": 0.0005429552984423935,
"learning_rate": 1.8546889507892295e-05,
"logits/chosen": -38.35095977783203,
"logits/rejected": -37.555809020996094,
"logps/chosen": -191.65780639648438,
"logps/rejected": -187.13223266601562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.386960983276367,
"rewards/margins": 15.752670288085938,
"rewards/rejected": -12.365708351135254,
"step": 1360
},
{
"epoch": 1.9080779944289694,
"grad_norm": 0.0004797822330147028,
"learning_rate": 1.8314763231197773e-05,
"logits/chosen": -40.002098083496094,
"logits/rejected": -37.707252502441406,
"logps/chosen": -198.4432373046875,
"logps/rejected": -179.8600616455078,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.40791654586792,
"rewards/margins": 14.185983657836914,
"rewards/rejected": -11.778066635131836,
"step": 1370
},
{
"epoch": 1.9220055710306405,
"grad_norm": 0.0002558935957495123,
"learning_rate": 1.808263695450325e-05,
"logits/chosen": -40.22335433959961,
"logits/rejected": -39.680335998535156,
"logps/chosen": -178.06088256835938,
"logps/rejected": -174.2890167236328,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.06481671333313,
"rewards/margins": 14.587986946105957,
"rewards/rejected": -11.523172378540039,
"step": 1380
},
{
"epoch": 1.935933147632312,
"grad_norm": 0.022678282111883163,
"learning_rate": 1.785051067780873e-05,
"logits/chosen": -37.662994384765625,
"logits/rejected": -42.678794860839844,
"logps/chosen": -174.59817504882812,
"logps/rejected": -172.0020751953125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9920780658721924,
"rewards/margins": 13.535223007202148,
"rewards/rejected": -10.543145179748535,
"step": 1390
},
{
"epoch": 1.9498607242339832,
"grad_norm": 0.002099443692713976,
"learning_rate": 1.7618384401114207e-05,
"logits/chosen": -40.57250213623047,
"logits/rejected": -43.25788497924805,
"logps/chosen": -188.49884033203125,
"logps/rejected": -175.46717834472656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0427377223968506,
"rewards/margins": 14.629735946655273,
"rewards/rejected": -11.586997985839844,
"step": 1400
},
{
"epoch": 1.9637883008356547,
"grad_norm": 0.0002445300342515111,
"learning_rate": 1.7386258124419685e-05,
"logits/chosen": -38.27973556518555,
"logits/rejected": -41.077720642089844,
"logps/chosen": -189.12991333007812,
"logps/rejected": -183.447998046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.450929641723633,
"rewards/margins": 15.17542552947998,
"rewards/rejected": -11.72449779510498,
"step": 1410
},
{
"epoch": 1.9777158774373258,
"grad_norm": 4.384323619888164e-05,
"learning_rate": 1.7154131847725163e-05,
"logits/chosen": -37.973472595214844,
"logits/rejected": -41.2947998046875,
"logps/chosen": -170.81488037109375,
"logps/rejected": -187.4012908935547,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1459457874298096,
"rewards/margins": 15.257524490356445,
"rewards/rejected": -12.111578941345215,
"step": 1420
},
{
"epoch": 1.9916434540389973,
"grad_norm": 0.0007763483445160091,
"learning_rate": 1.692200557103064e-05,
"logits/chosen": -37.70799255371094,
"logits/rejected": -39.826412200927734,
"logps/chosen": -190.74960327148438,
"logps/rejected": -175.0165252685547,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.826359987258911,
"rewards/margins": 14.098872184753418,
"rewards/rejected": -11.27251148223877,
"step": 1430
},
{
"epoch": 2.0055710306406684,
"grad_norm": 6.922234024386853e-05,
"learning_rate": 1.668987929433612e-05,
"logits/chosen": -36.95861053466797,
"logits/rejected": -40.93572998046875,
"logps/chosen": -195.0906524658203,
"logps/rejected": -184.3977508544922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.468242883682251,
"rewards/margins": 14.012043952941895,
"rewards/rejected": -11.543802261352539,
"step": 1440
},
{
"epoch": 2.01949860724234,
"grad_norm": 0.0008899418171495199,
"learning_rate": 1.6457753017641597e-05,
"logits/chosen": -37.98674774169922,
"logits/rejected": -37.987403869628906,
"logps/chosen": -181.60769653320312,
"logps/rejected": -180.72677612304688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1309444904327393,
"rewards/margins": 15.349719047546387,
"rewards/rejected": -12.218774795532227,
"step": 1450
},
{
"epoch": 2.033426183844011,
"grad_norm": 0.0009419086272828281,
"learning_rate": 1.6225626740947075e-05,
"logits/chosen": -37.715850830078125,
"logits/rejected": -41.038299560546875,
"logps/chosen": -188.2119598388672,
"logps/rejected": -174.30699157714844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8673110008239746,
"rewards/margins": 14.030998229980469,
"rewards/rejected": -11.163687705993652,
"step": 1460
},
{
"epoch": 2.0473537604456826,
"grad_norm": 0.00017140038835350424,
"learning_rate": 1.5993500464252553e-05,
"logits/chosen": -36.50278091430664,
"logits/rejected": -40.35831832885742,
"logps/chosen": -189.39010620117188,
"logps/rejected": -179.73275756835938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1329684257507324,
"rewards/margins": 14.958372116088867,
"rewards/rejected": -11.825403213500977,
"step": 1470
},
{
"epoch": 2.0612813370473537,
"grad_norm": 2.8825706976931542e-05,
"learning_rate": 1.576137418755803e-05,
"logits/chosen": -37.138877868652344,
"logits/rejected": -35.2958869934082,
"logps/chosen": -196.31619262695312,
"logps/rejected": -182.4630584716797,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.744528293609619,
"rewards/margins": 14.846966743469238,
"rewards/rejected": -12.102435111999512,
"step": 1480
},
{
"epoch": 2.0752089136490253,
"grad_norm": 0.00021149200620129704,
"learning_rate": 1.552924791086351e-05,
"logits/chosen": -39.6096305847168,
"logits/rejected": -46.948265075683594,
"logps/chosen": -183.4660186767578,
"logps/rejected": -176.20413208007812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1110737323760986,
"rewards/margins": 14.174654006958008,
"rewards/rejected": -11.063581466674805,
"step": 1490
},
{
"epoch": 2.0891364902506964,
"grad_norm": 0.0004168855957686901,
"learning_rate": 1.529712163416899e-05,
"logits/chosen": -33.97929763793945,
"logits/rejected": -37.035865783691406,
"logps/chosen": -179.11045837402344,
"logps/rejected": -172.54037475585938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1583425998687744,
"rewards/margins": 14.817370414733887,
"rewards/rejected": -11.659029006958008,
"step": 1500
},
{
"epoch": 2.103064066852368,
"grad_norm": 4.1843464714474976e-05,
"learning_rate": 1.5064995357474469e-05,
"logits/chosen": -36.117828369140625,
"logits/rejected": -32.342254638671875,
"logps/chosen": -199.46429443359375,
"logps/rejected": -186.94625854492188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9351420402526855,
"rewards/margins": 14.709848403930664,
"rewards/rejected": -11.774707794189453,
"step": 1510
},
{
"epoch": 2.116991643454039,
"grad_norm": 0.0004989901790395379,
"learning_rate": 1.4832869080779947e-05,
"logits/chosen": -35.539146423339844,
"logits/rejected": -33.397926330566406,
"logps/chosen": -190.5336456298828,
"logps/rejected": -182.02114868164062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.323108196258545,
"rewards/margins": 15.390007019042969,
"rewards/rejected": -12.066898345947266,
"step": 1520
},
{
"epoch": 2.13091922005571,
"grad_norm": 0.0010163492988795042,
"learning_rate": 1.4600742804085425e-05,
"logits/chosen": -36.64421463012695,
"logits/rejected": -37.93674087524414,
"logps/chosen": -198.0296630859375,
"logps/rejected": -179.7415008544922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.915405750274658,
"rewards/margins": 14.40515422821045,
"rewards/rejected": -11.489748001098633,
"step": 1530
},
{
"epoch": 2.1448467966573816,
"grad_norm": 0.00033078828710131347,
"learning_rate": 1.4368616527390903e-05,
"logits/chosen": -37.70105743408203,
"logits/rejected": -36.71582794189453,
"logps/chosen": -199.81381225585938,
"logps/rejected": -185.13119506835938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8086984157562256,
"rewards/margins": 15.142376899719238,
"rewards/rejected": -12.33367919921875,
"step": 1540
},
{
"epoch": 2.1587743732590527,
"grad_norm": 0.0038093943148851395,
"learning_rate": 1.413649025069638e-05,
"logits/chosen": -36.39249038696289,
"logits/rejected": -38.92975616455078,
"logps/chosen": -196.83294677734375,
"logps/rejected": -175.97525024414062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1542813777923584,
"rewards/margins": 14.027664184570312,
"rewards/rejected": -10.873384475708008,
"step": 1550
},
{
"epoch": 2.1727019498607243,
"grad_norm": 0.00011094040382886305,
"learning_rate": 1.3904363974001859e-05,
"logits/chosen": -36.158992767333984,
"logits/rejected": -44.41789627075195,
"logps/chosen": -187.1454620361328,
"logps/rejected": -183.45184326171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.974985361099243,
"rewards/margins": 14.125747680664062,
"rewards/rejected": -11.150762557983398,
"step": 1560
},
{
"epoch": 2.1866295264623954,
"grad_norm": 0.003225723747164011,
"learning_rate": 1.3672237697307335e-05,
"logits/chosen": -35.47116470336914,
"logits/rejected": -43.47711944580078,
"logps/chosen": -191.4207305908203,
"logps/rejected": -183.61276245117188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8648734092712402,
"rewards/margins": 14.461938858032227,
"rewards/rejected": -11.597065925598145,
"step": 1570
},
{
"epoch": 2.200557103064067,
"grad_norm": 0.00013535360631067306,
"learning_rate": 1.3440111420612813e-05,
"logits/chosen": -35.062652587890625,
"logits/rejected": -37.315582275390625,
"logps/chosen": -180.5184783935547,
"logps/rejected": -180.47647094726562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1524276733398438,
"rewards/margins": 14.701214790344238,
"rewards/rejected": -11.548785209655762,
"step": 1580
},
{
"epoch": 2.214484679665738,
"grad_norm": 5.084893018647563e-06,
"learning_rate": 1.3207985143918291e-05,
"logits/chosen": -35.936790466308594,
"logits/rejected": -36.74972152709961,
"logps/chosen": -181.1525421142578,
"logps/rejected": -181.84603881835938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.186203718185425,
"rewards/margins": 15.35499095916748,
"rewards/rejected": -12.168787002563477,
"step": 1590
},
{
"epoch": 2.2284122562674096,
"grad_norm": 0.0004160265962127596,
"learning_rate": 1.2975858867223769e-05,
"logits/chosen": -37.140037536621094,
"logits/rejected": -42.357078552246094,
"logps/chosen": -187.4177703857422,
"logps/rejected": -178.51724243164062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9173107147216797,
"rewards/margins": 13.878204345703125,
"rewards/rejected": -10.960893630981445,
"step": 1600
},
{
"epoch": 2.2423398328690807,
"grad_norm": 0.00011604127212194726,
"learning_rate": 1.2743732590529247e-05,
"logits/chosen": -34.818607330322266,
"logits/rejected": -37.724525451660156,
"logps/chosen": -183.49917602539062,
"logps/rejected": -178.86361694335938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8125436305999756,
"rewards/margins": 14.389010429382324,
"rewards/rejected": -11.576468467712402,
"step": 1610
},
{
"epoch": 2.256267409470752,
"grad_norm": 0.0017907076980918646,
"learning_rate": 1.2511606313834725e-05,
"logits/chosen": -37.98354721069336,
"logits/rejected": -49.680633544921875,
"logps/chosen": -182.12060546875,
"logps/rejected": -176.7200927734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.886204481124878,
"rewards/margins": 13.741534233093262,
"rewards/rejected": -10.855329513549805,
"step": 1620
},
{
"epoch": 2.2701949860724233,
"grad_norm": 0.0012422021245583892,
"learning_rate": 1.2279480037140205e-05,
"logits/chosen": -36.824703216552734,
"logits/rejected": -36.0146369934082,
"logps/chosen": -207.47109985351562,
"logps/rejected": -192.5692596435547,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.312756061553955,
"rewards/margins": 14.709668159484863,
"rewards/rejected": -12.396913528442383,
"step": 1630
},
{
"epoch": 2.284122562674095,
"grad_norm": 0.00051548529881984,
"learning_rate": 1.2047353760445683e-05,
"logits/chosen": -37.36172103881836,
"logits/rejected": -35.01740264892578,
"logps/chosen": -201.63711547851562,
"logps/rejected": -179.27565002441406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.687352180480957,
"rewards/margins": 14.42353630065918,
"rewards/rejected": -11.736186027526855,
"step": 1640
},
{
"epoch": 2.298050139275766,
"grad_norm": 0.00029921080567874014,
"learning_rate": 1.181522748375116e-05,
"logits/chosen": -38.72724533081055,
"logits/rejected": -41.40291213989258,
"logps/chosen": -176.58592224121094,
"logps/rejected": -186.08123779296875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.005880832672119,
"rewards/margins": 14.604331970214844,
"rewards/rejected": -11.598451614379883,
"step": 1650
},
{
"epoch": 2.3119777158774375,
"grad_norm": 0.00012926581257488579,
"learning_rate": 1.1583101207056638e-05,
"logits/chosen": -35.43940734863281,
"logits/rejected": -36.595970153808594,
"logps/chosen": -187.47674560546875,
"logps/rejected": -182.973876953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.200408458709717,
"rewards/margins": 14.481452941894531,
"rewards/rejected": -11.281042098999023,
"step": 1660
},
{
"epoch": 2.3259052924791086,
"grad_norm": 0.0009956905851140618,
"learning_rate": 1.1350974930362116e-05,
"logits/chosen": -33.24986267089844,
"logits/rejected": -35.32088088989258,
"logps/chosen": -190.22396850585938,
"logps/rejected": -186.2030029296875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.875269889831543,
"rewards/margins": 15.12162971496582,
"rewards/rejected": -12.246358871459961,
"step": 1670
},
{
"epoch": 2.33983286908078,
"grad_norm": 0.00024119042791426182,
"learning_rate": 1.1118848653667596e-05,
"logits/chosen": -35.24303436279297,
"logits/rejected": -37.33085632324219,
"logps/chosen": -184.21661376953125,
"logps/rejected": -183.30850219726562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1757493019104004,
"rewards/margins": 15.013044357299805,
"rewards/rejected": -11.837295532226562,
"step": 1680
},
{
"epoch": 2.3537604456824512,
"grad_norm": 0.0016317879781126976,
"learning_rate": 1.0886722376973074e-05,
"logits/chosen": -36.30957794189453,
"logits/rejected": -43.52277755737305,
"logps/chosen": -192.4365234375,
"logps/rejected": -186.6822052001953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.168163776397705,
"rewards/margins": 14.91504955291748,
"rewards/rejected": -11.746885299682617,
"step": 1690
},
{
"epoch": 2.3676880222841223,
"grad_norm": 0.00019028224050998688,
"learning_rate": 1.0654596100278552e-05,
"logits/chosen": -35.48088073730469,
"logits/rejected": -40.22465133666992,
"logps/chosen": -178.8065948486328,
"logps/rejected": -177.88638305664062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1263413429260254,
"rewards/margins": 14.86894416809082,
"rewards/rejected": -11.742603302001953,
"step": 1700
},
{
"epoch": 2.381615598885794,
"grad_norm": 0.00013990727893542498,
"learning_rate": 1.042246982358403e-05,
"logits/chosen": -35.26982879638672,
"logits/rejected": -40.66838836669922,
"logps/chosen": -181.98269653320312,
"logps/rejected": -180.34336853027344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.132636547088623,
"rewards/margins": 15.100153923034668,
"rewards/rejected": -11.96751594543457,
"step": 1710
},
{
"epoch": 2.3955431754874654,
"grad_norm": 0.00010347921488573775,
"learning_rate": 1.0190343546889508e-05,
"logits/chosen": -35.530635833740234,
"logits/rejected": -37.326942443847656,
"logps/chosen": -174.54537963867188,
"logps/rejected": -173.74192810058594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0651495456695557,
"rewards/margins": 14.588651657104492,
"rewards/rejected": -11.5235013961792,
"step": 1720
},
{
"epoch": 2.4094707520891365,
"grad_norm": 0.0007370264502242208,
"learning_rate": 9.958217270194986e-06,
"logits/chosen": -33.94354248046875,
"logits/rejected": -37.71461486816406,
"logps/chosen": -182.42373657226562,
"logps/rejected": -184.89016723632812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.007021188735962,
"rewards/margins": 14.52685546875,
"rewards/rejected": -11.519835472106934,
"step": 1730
},
{
"epoch": 2.4233983286908076,
"grad_norm": 0.0005266707739792764,
"learning_rate": 9.726090993500464e-06,
"logits/chosen": -35.36183547973633,
"logits/rejected": -42.900447845458984,
"logps/chosen": -197.20449829101562,
"logps/rejected": -183.08486938476562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7276971340179443,
"rewards/margins": 14.261263847351074,
"rewards/rejected": -11.53356647491455,
"step": 1740
},
{
"epoch": 2.437325905292479,
"grad_norm": 0.0010256224777549505,
"learning_rate": 9.493964716805944e-06,
"logits/chosen": -32.54072570800781,
"logits/rejected": -40.63361358642578,
"logps/chosen": -182.46578979492188,
"logps/rejected": -180.01171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.2156386375427246,
"rewards/margins": 14.604194641113281,
"rewards/rejected": -11.388555526733398,
"step": 1750
},
{
"epoch": 2.4512534818941503,
"grad_norm": 0.0003196709440089762,
"learning_rate": 9.261838440111422e-06,
"logits/chosen": -36.42176818847656,
"logits/rejected": -40.110435485839844,
"logps/chosen": -172.70303344726562,
"logps/rejected": -183.40029907226562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9896297454833984,
"rewards/margins": 14.956689834594727,
"rewards/rejected": -11.967057228088379,
"step": 1760
},
{
"epoch": 2.465181058495822,
"grad_norm": 0.0006114134448580444,
"learning_rate": 9.0297121634169e-06,
"logits/chosen": -34.053199768066406,
"logits/rejected": -40.59447479248047,
"logps/chosen": -187.02113342285156,
"logps/rejected": -177.52719116210938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.881593942642212,
"rewards/margins": 14.561999320983887,
"rewards/rejected": -11.680405616760254,
"step": 1770
},
{
"epoch": 2.479108635097493,
"grad_norm": 0.006414721254259348,
"learning_rate": 8.797585886722378e-06,
"logits/chosen": -35.063026428222656,
"logits/rejected": -40.642601013183594,
"logps/chosen": -180.2222442626953,
"logps/rejected": -177.395751953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.213207721710205,
"rewards/margins": 14.45429801940918,
"rewards/rejected": -11.241090774536133,
"step": 1780
},
{
"epoch": 2.4930362116991645,
"grad_norm": 0.00032282338361255825,
"learning_rate": 8.565459610027856e-06,
"logits/chosen": -35.19350051879883,
"logits/rejected": -44.75577926635742,
"logps/chosen": -182.2408447265625,
"logps/rejected": -174.94203186035156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.2195639610290527,
"rewards/margins": 14.126502990722656,
"rewards/rejected": -10.906938552856445,
"step": 1790
},
{
"epoch": 2.5069637883008355,
"grad_norm": 0.00048296854947693646,
"learning_rate": 8.333333333333334e-06,
"logits/chosen": -37.459720611572266,
"logits/rejected": -34.258609771728516,
"logps/chosen": -200.30984497070312,
"logps/rejected": -186.15658569335938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.408186912536621,
"rewards/margins": 14.674453735351562,
"rewards/rejected": -12.266264915466309,
"step": 1800
},
{
"epoch": 2.520891364902507,
"grad_norm": 0.000831536075565964,
"learning_rate": 8.101207056638812e-06,
"logits/chosen": -34.92743682861328,
"logits/rejected": -37.31873321533203,
"logps/chosen": -205.9928741455078,
"logps/rejected": -176.43490600585938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.477787971496582,
"rewards/margins": 14.188326835632324,
"rewards/rejected": -11.710537910461426,
"step": 1810
},
{
"epoch": 2.534818941504178,
"grad_norm": 4.310339136281982e-05,
"learning_rate": 7.869080779944291e-06,
"logits/chosen": -33.059104919433594,
"logits/rejected": -38.825035095214844,
"logps/chosen": -192.43711853027344,
"logps/rejected": -172.77899169921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.044785261154175,
"rewards/margins": 14.586946487426758,
"rewards/rejected": -11.54216194152832,
"step": 1820
},
{
"epoch": 2.5487465181058497,
"grad_norm": 0.0003261861565988511,
"learning_rate": 7.63695450324977e-06,
"logits/chosen": -32.958534240722656,
"logits/rejected": -36.15265655517578,
"logps/chosen": -184.374755859375,
"logps/rejected": -186.88174438476562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.408311367034912,
"rewards/margins": 14.616594314575195,
"rewards/rejected": -12.208281517028809,
"step": 1830
},
{
"epoch": 2.562674094707521,
"grad_norm": 0.0008768205298110843,
"learning_rate": 7.4048282265552465e-06,
"logits/chosen": -34.02647018432617,
"logits/rejected": -40.55278015136719,
"logps/chosen": -180.45309448242188,
"logps/rejected": -187.4500274658203,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.939589262008667,
"rewards/margins": 15.011263847351074,
"rewards/rejected": -12.071674346923828,
"step": 1840
},
{
"epoch": 2.5766016713091924,
"grad_norm": 0.00012635016173589975,
"learning_rate": 7.1727019498607245e-06,
"logits/chosen": -35.22064971923828,
"logits/rejected": -39.80550003051758,
"logps/chosen": -185.4181365966797,
"logps/rejected": -171.8718719482422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.2081618309020996,
"rewards/margins": 14.463720321655273,
"rewards/rejected": -11.255556106567383,
"step": 1850
},
{
"epoch": 2.5905292479108635,
"grad_norm": 0.0006862750160507858,
"learning_rate": 6.9405756731662025e-06,
"logits/chosen": -32.005226135253906,
"logits/rejected": -36.187660217285156,
"logps/chosen": -180.09140014648438,
"logps/rejected": -182.02792358398438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8680760860443115,
"rewards/margins": 14.800928115844727,
"rewards/rejected": -11.932851791381836,
"step": 1860
},
{
"epoch": 2.6044568245125346,
"grad_norm": 4.4549200538313016e-05,
"learning_rate": 6.7084493964716805e-06,
"logits/chosen": -35.79204559326172,
"logits/rejected": -38.733375549316406,
"logps/chosen": -189.5426483154297,
"logps/rejected": -189.75588989257812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.612856388092041,
"rewards/margins": 14.540079116821289,
"rewards/rejected": -11.927224159240723,
"step": 1870
},
{
"epoch": 2.618384401114206,
"grad_norm": 2.0292129192966968e-05,
"learning_rate": 6.4763231197771585e-06,
"logits/chosen": -34.89278030395508,
"logits/rejected": -33.01803970336914,
"logps/chosen": -180.7728729248047,
"logps/rejected": -184.56167602539062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.2225723266601562,
"rewards/margins": 16.246957778930664,
"rewards/rejected": -13.024386405944824,
"step": 1880
},
{
"epoch": 2.6323119777158777,
"grad_norm": 0.000608162023127079,
"learning_rate": 6.244196843082637e-06,
"logits/chosen": -32.106956481933594,
"logits/rejected": -39.27301788330078,
"logps/chosen": -183.5093994140625,
"logps/rejected": -175.55055236816406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.5961029529571533,
"rewards/margins": 14.337031364440918,
"rewards/rejected": -10.740928649902344,
"step": 1890
},
{
"epoch": 2.6462395543175488,
"grad_norm": 0.00044016874744556844,
"learning_rate": 6.012070566388115e-06,
"logits/chosen": -36.276512145996094,
"logits/rejected": -39.37938690185547,
"logps/chosen": -195.60324096679688,
"logps/rejected": -177.52719116210938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.660752773284912,
"rewards/margins": 13.905537605285645,
"rewards/rejected": -11.244784355163574,
"step": 1900
},
{
"epoch": 2.66016713091922,
"grad_norm": 0.0002324298257008195,
"learning_rate": 5.779944289693594e-06,
"logits/chosen": -36.83045196533203,
"logits/rejected": -40.6081428527832,
"logps/chosen": -180.4891815185547,
"logps/rejected": -181.4812469482422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.993919849395752,
"rewards/margins": 14.951128005981445,
"rewards/rejected": -11.957206726074219,
"step": 1910
},
{
"epoch": 2.6740947075208914,
"grad_norm": 0.0007754198159091175,
"learning_rate": 5.547818012999071e-06,
"logits/chosen": -30.579111099243164,
"logits/rejected": -38.765045166015625,
"logps/chosen": -174.76547241210938,
"logps/rejected": -183.4005126953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.667250394821167,
"rewards/margins": 13.938199043273926,
"rewards/rejected": -11.27094841003418,
"step": 1920
},
{
"epoch": 2.688022284122563,
"grad_norm": 0.0011436525965109468,
"learning_rate": 5.315691736304549e-06,
"logits/chosen": -34.016395568847656,
"logits/rejected": -36.03986740112305,
"logps/chosen": -186.30514526367188,
"logps/rejected": -179.84713745117188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.890596389770508,
"rewards/margins": 15.47045612335205,
"rewards/rejected": -12.579858779907227,
"step": 1930
},
{
"epoch": 2.701949860724234,
"grad_norm": 0.0023635756224393845,
"learning_rate": 5.083565459610028e-06,
"logits/chosen": -34.19483184814453,
"logits/rejected": -40.62418746948242,
"logps/chosen": -167.63302612304688,
"logps/rejected": -179.10595703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.8706557750701904,
"rewards/margins": 14.436419486999512,
"rewards/rejected": -11.565765380859375,
"step": 1940
},
{
"epoch": 2.715877437325905,
"grad_norm": 0.0005265133222565055,
"learning_rate": 4.851439182915506e-06,
"logits/chosen": -35.469627380371094,
"logits/rejected": -47.087947845458984,
"logps/chosen": -179.50204467773438,
"logps/rejected": -179.5740966796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.908020257949829,
"rewards/margins": 14.18663215637207,
"rewards/rejected": -11.278613090515137,
"step": 1950
},
{
"epoch": 2.7298050139275767,
"grad_norm": 0.0008786062244325876,
"learning_rate": 4.619312906220984e-06,
"logits/chosen": -32.846824645996094,
"logits/rejected": -38.901092529296875,
"logps/chosen": -186.23672485351562,
"logps/rejected": -179.1976776123047,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0682077407836914,
"rewards/margins": 14.65526008605957,
"rewards/rejected": -11.587051391601562,
"step": 1960
},
{
"epoch": 2.743732590529248,
"grad_norm": 0.0003964914649259299,
"learning_rate": 4.387186629526462e-06,
"logits/chosen": -33.28449249267578,
"logits/rejected": -37.38798141479492,
"logps/chosen": -170.21182250976562,
"logps/rejected": -183.37669372558594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0049500465393066,
"rewards/margins": 14.757115364074707,
"rewards/rejected": -11.752164840698242,
"step": 1970
},
{
"epoch": 2.7576601671309193,
"grad_norm": 9.640253119869158e-05,
"learning_rate": 4.155060352831941e-06,
"logits/chosen": -34.633426666259766,
"logits/rejected": -40.41498565673828,
"logps/chosen": -187.4309539794922,
"logps/rejected": -183.026123046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.992767095565796,
"rewards/margins": 15.027621269226074,
"rewards/rejected": -12.034854888916016,
"step": 1980
},
{
"epoch": 2.7715877437325904,
"grad_norm": 0.003279632655903697,
"learning_rate": 3.922934076137419e-06,
"logits/chosen": -33.140899658203125,
"logits/rejected": -33.699161529541016,
"logps/chosen": -182.78408813476562,
"logps/rejected": -178.6408233642578,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.0828540325164795,
"rewards/margins": 14.902987480163574,
"rewards/rejected": -11.8201322555542,
"step": 1990
},
{
"epoch": 2.785515320334262,
"grad_norm": 0.00014628810458816588,
"learning_rate": 3.690807799442897e-06,
"logits/chosen": -32.4156379699707,
"logits/rejected": -35.6164665222168,
"logps/chosen": -176.88565063476562,
"logps/rejected": -180.22218322753906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.3253417015075684,
"rewards/margins": 15.263575553894043,
"rewards/rejected": -11.938233375549316,
"step": 2000
},
{
"epoch": 2.799442896935933,
"grad_norm": 0.00014062832633499056,
"learning_rate": 3.4586815227483758e-06,
"logits/chosen": -32.96575164794922,
"logits/rejected": -38.11034393310547,
"logps/chosen": -189.6385040283203,
"logps/rejected": -182.67161560058594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1147329807281494,
"rewards/margins": 14.853757858276367,
"rewards/rejected": -11.73902702331543,
"step": 2010
},
{
"epoch": 2.8133704735376046,
"grad_norm": 0.0013761012814939022,
"learning_rate": 3.2265552460538537e-06,
"logits/chosen": -30.57448387145996,
"logits/rejected": -31.974700927734375,
"logps/chosen": -173.05075073242188,
"logps/rejected": -176.5461883544922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.327730655670166,
"rewards/margins": 14.546607971191406,
"rewards/rejected": -11.218875885009766,
"step": 2020
},
{
"epoch": 2.8272980501392757,
"grad_norm": 0.0010256225941702724,
"learning_rate": 2.9944289693593313e-06,
"logits/chosen": -32.459144592285156,
"logits/rejected": -33.54745101928711,
"logps/chosen": -189.91656494140625,
"logps/rejected": -184.4038848876953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7565293312072754,
"rewards/margins": 14.743266105651855,
"rewards/rejected": -11.986737251281738,
"step": 2030
},
{
"epoch": 2.841225626740947,
"grad_norm": 0.0001987310970434919,
"learning_rate": 2.7623026926648097e-06,
"logits/chosen": -34.515052795410156,
"logits/rejected": -40.575809478759766,
"logps/chosen": -188.13137817382812,
"logps/rejected": -173.988525390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.147059917449951,
"rewards/margins": 13.975947380065918,
"rewards/rejected": -10.828886985778809,
"step": 2040
},
{
"epoch": 2.8551532033426184,
"grad_norm": 0.0019440051401033998,
"learning_rate": 2.5301764159702877e-06,
"logits/chosen": -31.311901092529297,
"logits/rejected": -43.11977005004883,
"logps/chosen": -176.417236328125,
"logps/rejected": -179.4446258544922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9070076942443848,
"rewards/margins": 14.395605087280273,
"rewards/rejected": -11.48859691619873,
"step": 2050
},
{
"epoch": 2.86908077994429,
"grad_norm": 0.0007369028753601015,
"learning_rate": 2.298050139275766e-06,
"logits/chosen": -35.70469665527344,
"logits/rejected": -39.731964111328125,
"logps/chosen": -198.8573760986328,
"logps/rejected": -193.2760009765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.670799732208252,
"rewards/margins": 15.742904663085938,
"rewards/rejected": -13.072105407714844,
"step": 2060
},
{
"epoch": 2.883008356545961,
"grad_norm": 0.0003779043036047369,
"learning_rate": 2.0659238625812445e-06,
"logits/chosen": -31.949493408203125,
"logits/rejected": -31.089712142944336,
"logps/chosen": -191.99917602539062,
"logps/rejected": -180.83590698242188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.66987943649292,
"rewards/margins": 14.582662582397461,
"rewards/rejected": -11.912784576416016,
"step": 2070
},
{
"epoch": 2.896935933147632,
"grad_norm": 0.00013451039558276534,
"learning_rate": 1.8337975858867223e-06,
"logits/chosen": -33.855201721191406,
"logits/rejected": -40.16669464111328,
"logps/chosen": -194.4259490966797,
"logps/rejected": -189.32534790039062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.899250030517578,
"rewards/margins": 14.937406539916992,
"rewards/rejected": -12.038156509399414,
"step": 2080
},
{
"epoch": 2.9108635097493036,
"grad_norm": 0.0010667495662346482,
"learning_rate": 1.6016713091922007e-06,
"logits/chosen": -32.62724304199219,
"logits/rejected": -32.57086944580078,
"logps/chosen": -188.95858764648438,
"logps/rejected": -182.73516845703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.1314823627471924,
"rewards/margins": 15.311136245727539,
"rewards/rejected": -12.179654121398926,
"step": 2090
},
{
"epoch": 2.924791086350975,
"grad_norm": 2.006152135436423e-05,
"learning_rate": 1.369545032497679e-06,
"logits/chosen": -32.503414154052734,
"logits/rejected": -39.49541091918945,
"logps/chosen": -190.20492553710938,
"logps/rejected": -180.6317901611328,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.7702364921569824,
"rewards/margins": 14.473835945129395,
"rewards/rejected": -11.70359992980957,
"step": 2100
},
{
"epoch": 2.9387186629526463,
"grad_norm": 0.00034252344630658627,
"learning_rate": 1.1374187558031571e-06,
"logits/chosen": -35.25168228149414,
"logits/rejected": -38.91762161254883,
"logps/chosen": -175.8085479736328,
"logps/rejected": -179.20114135742188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.377187728881836,
"rewards/margins": 14.512290954589844,
"rewards/rejected": -11.135104179382324,
"step": 2110
},
{
"epoch": 2.9526462395543174,
"grad_norm": 0.00010172268230235204,
"learning_rate": 9.052924791086352e-07,
"logits/chosen": -33.312416076660156,
"logits/rejected": -40.598228454589844,
"logps/chosen": -190.16940307617188,
"logps/rejected": -175.98265075683594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.345635414123535,
"rewards/margins": 13.13763427734375,
"rewards/rejected": -10.791997909545898,
"step": 2120
},
{
"epoch": 2.966573816155989,
"grad_norm": 0.00028497324092313647,
"learning_rate": 6.731662024141133e-07,
"logits/chosen": -33.10255813598633,
"logits/rejected": -37.320838928222656,
"logps/chosen": -194.23452758789062,
"logps/rejected": -182.88694763183594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.9284331798553467,
"rewards/margins": 14.861193656921387,
"rewards/rejected": -11.932759284973145,
"step": 2130
},
{
"epoch": 2.98050139275766,
"grad_norm": 6.592216959688812e-05,
"learning_rate": 4.4103992571959147e-07,
"logits/chosen": -33.3602180480957,
"logits/rejected": -31.276813507080078,
"logps/chosen": -202.84475708007812,
"logps/rejected": -188.7292938232422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.3148932456970215,
"rewards/margins": 14.879697799682617,
"rewards/rejected": -12.564805030822754,
"step": 2140
},
{
"epoch": 2.9944289693593316,
"grad_norm": 0.0003906878991983831,
"learning_rate": 2.0891364902506967e-07,
"logits/chosen": -32.936134338378906,
"logits/rejected": -35.92009735107422,
"logps/chosen": -193.31060791015625,
"logps/rejected": -176.9427490234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.182100772857666,
"rewards/margins": 14.334200859069824,
"rewards/rejected": -11.152099609375,
"step": 2150
}
],
"logging_steps": 10,
"max_steps": 2154,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}