output_iter3_ckpt / trainer_state.json
DavieLion's picture
Upload 11 files
b3897b2 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 6.0,
"eval_steps": 100,
"global_step": 1650,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0036363636363636364,
"grad_norm": 161867.6203244379,
"learning_rate": 6.06060606060606e-10,
"logits/generated": 3.250883102416992,
"logits/real": 3.3663041591644287,
"logps/generated": -1246.279052734375,
"logps/real": -577.5853881835938,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/generated": 0.0,
"rewards/margins": 0.0,
"rewards/real": 0.0,
"step": 1
},
{
"epoch": 0.03636363636363636,
"grad_norm": 65759.84739536181,
"learning_rate": 6.060606060606061e-09,
"logits/generated": 3.811872720718384,
"logits/real": 3.2956764698028564,
"logps/generated": -1554.837158203125,
"logps/real": -466.0841064453125,
"loss": 7.4107,
"rewards/accuracies": 0.5416666865348816,
"rewards/generated": -13.777423858642578,
"rewards/margins": 11.481453895568848,
"rewards/real": -2.295969247817993,
"step": 10
},
{
"epoch": 0.07272727272727272,
"grad_norm": 158533.00394960475,
"learning_rate": 1.2121212121212122e-08,
"logits/generated": 3.680910587310791,
"logits/real": 3.807919979095459,
"logps/generated": -1449.9287109375,
"logps/real": -537.6838989257812,
"loss": 8.3198,
"rewards/accuracies": 0.612500011920929,
"rewards/generated": -14.788434982299805,
"rewards/margins": 12.738945007324219,
"rewards/real": -2.0494906902313232,
"step": 20
},
{
"epoch": 0.10909090909090909,
"grad_norm": 44559.67859277519,
"learning_rate": 1.818181818181818e-08,
"logits/generated": 3.533531904220581,
"logits/real": 4.045346736907959,
"logps/generated": -1542.246337890625,
"logps/real": -596.746337890625,
"loss": 5.2618,
"rewards/accuracies": 0.7124999761581421,
"rewards/generated": -28.993526458740234,
"rewards/margins": 23.528636932373047,
"rewards/real": -5.464890956878662,
"step": 30
},
{
"epoch": 0.14545454545454545,
"grad_norm": 24311.04160747661,
"learning_rate": 2.4242424242424243e-08,
"logits/generated": 3.2833874225616455,
"logits/real": 3.8217270374298096,
"logps/generated": -1298.0233154296875,
"logps/real": -616.4668579101562,
"loss": 2.1816,
"rewards/accuracies": 0.875,
"rewards/generated": -46.675140380859375,
"rewards/margins": 37.830352783203125,
"rewards/real": -8.844793319702148,
"step": 40
},
{
"epoch": 0.18181818181818182,
"grad_norm": 9511.781352491278,
"learning_rate": 3.0303030303030305e-08,
"logits/generated": 3.39762544631958,
"logits/real": 4.15239143371582,
"logps/generated": -1378.459228515625,
"logps/real": -505.8817443847656,
"loss": 0.5522,
"rewards/accuracies": 0.925000011920929,
"rewards/generated": -107.13529205322266,
"rewards/margins": 85.6633529663086,
"rewards/real": -21.471942901611328,
"step": 50
},
{
"epoch": 0.21818181818181817,
"grad_norm": 7819.324676656772,
"learning_rate": 3.636363636363636e-08,
"logits/generated": 3.292616605758667,
"logits/real": 3.9783637523651123,
"logps/generated": -1550.360107421875,
"logps/real": -536.5904541015625,
"loss": 0.4931,
"rewards/accuracies": 0.9624999761581421,
"rewards/generated": -114.218017578125,
"rewards/margins": 97.15935516357422,
"rewards/real": -17.058650970458984,
"step": 60
},
{
"epoch": 0.2545454545454545,
"grad_norm": 5304.581511521464,
"learning_rate": 4.242424242424242e-08,
"logits/generated": 3.7238917350769043,
"logits/real": 3.9821434020996094,
"logps/generated": -1807.1488037109375,
"logps/real": -562.0470581054688,
"loss": 0.4211,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -135.7723388671875,
"rewards/margins": 114.70655822753906,
"rewards/real": -21.065773010253906,
"step": 70
},
{
"epoch": 0.2909090909090909,
"grad_norm": 1090.3810930155685,
"learning_rate": 4.8484848484848486e-08,
"logits/generated": 4.115612506866455,
"logits/real": 3.496765613555908,
"logps/generated": -1947.6031494140625,
"logps/real": -505.0247497558594,
"loss": 0.2413,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -153.8606719970703,
"rewards/margins": 132.05270385742188,
"rewards/real": -21.80796241760254,
"step": 80
},
{
"epoch": 0.32727272727272727,
"grad_norm": 0.0028666690842750952,
"learning_rate": 5.454545454545454e-08,
"logits/generated": 3.084749937057495,
"logits/real": 3.830709457397461,
"logps/generated": -1386.0224609375,
"logps/real": -544.002685546875,
"loss": 0.0918,
"rewards/accuracies": 1.0,
"rewards/generated": -187.54513549804688,
"rewards/margins": 168.12576293945312,
"rewards/real": -19.419374465942383,
"step": 90
},
{
"epoch": 0.36363636363636365,
"grad_norm": 2548.034315563765,
"learning_rate": 6.060606060606061e-08,
"logits/generated": 3.542879581451416,
"logits/real": 3.966137647628784,
"logps/generated": -1731.931884765625,
"logps/real": -544.5482788085938,
"loss": 0.0298,
"rewards/accuracies": 1.0,
"rewards/generated": -183.2211151123047,
"rewards/margins": 162.5536346435547,
"rewards/real": -20.6674747467041,
"step": 100
},
{
"epoch": 0.4,
"grad_norm": 9125.456952119537,
"learning_rate": 6.666666666666665e-08,
"logits/generated": 3.551018476486206,
"logits/real": 4.005837440490723,
"logps/generated": -1565.452880859375,
"logps/real": -569.3267822265625,
"loss": 0.1962,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -193.08192443847656,
"rewards/margins": 174.71852111816406,
"rewards/real": -18.363407135009766,
"step": 110
},
{
"epoch": 0.43636363636363634,
"grad_norm": 43.62923605034956,
"learning_rate": 7.272727272727273e-08,
"logits/generated": 4.0249834060668945,
"logits/real": 4.246652603149414,
"logps/generated": -1703.650634765625,
"logps/real": -472.86328125,
"loss": 0.1856,
"rewards/accuracies": 1.0,
"rewards/generated": -194.37936401367188,
"rewards/margins": 174.404052734375,
"rewards/real": -19.97530174255371,
"step": 120
},
{
"epoch": 0.4727272727272727,
"grad_norm": 1.3324222147148856e-07,
"learning_rate": 7.878787878787878e-08,
"logits/generated": 3.621720790863037,
"logits/real": 4.0656514167785645,
"logps/generated": -1639.7611083984375,
"logps/real": -544.0973510742188,
"loss": 0.0656,
"rewards/accuracies": 1.0,
"rewards/generated": -217.6341094970703,
"rewards/margins": 197.27743530273438,
"rewards/real": -20.356674194335938,
"step": 130
},
{
"epoch": 0.509090909090909,
"grad_norm": 7274.262763325892,
"learning_rate": 8.484848484848484e-08,
"logits/generated": 3.970414400100708,
"logits/real": 3.919184923171997,
"logps/generated": -1788.6624755859375,
"logps/real": -516.3016357421875,
"loss": 0.1568,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -210.6239471435547,
"rewards/margins": 190.55148315429688,
"rewards/real": -20.072477340698242,
"step": 140
},
{
"epoch": 0.5454545454545454,
"grad_norm": 0.0002880250676340669,
"learning_rate": 9.09090909090909e-08,
"logits/generated": 3.3956267833709717,
"logits/real": 3.999126434326172,
"logps/generated": -1413.697021484375,
"logps/real": -508.56927490234375,
"loss": 0.1265,
"rewards/accuracies": 1.0,
"rewards/generated": -262.8182067871094,
"rewards/margins": 238.43539428710938,
"rewards/real": -24.3828182220459,
"step": 150
},
{
"epoch": 0.5818181818181818,
"grad_norm": 8.356960095641936e-12,
"learning_rate": 9.696969696969697e-08,
"logits/generated": 3.9105753898620605,
"logits/real": 4.2755632400512695,
"logps/generated": -1530.80615234375,
"logps/real": -514.7744140625,
"loss": 0.4278,
"rewards/accuracies": 1.0,
"rewards/generated": -214.5561981201172,
"rewards/margins": 194.1567840576172,
"rewards/real": -20.399410247802734,
"step": 160
},
{
"epoch": 0.6181818181818182,
"grad_norm": 2.1905714551153064e-05,
"learning_rate": 9.966329966329967e-08,
"logits/generated": 4.027784824371338,
"logits/real": 3.9190127849578857,
"logps/generated": -1677.461669921875,
"logps/real": -529.8884887695312,
"loss": 0.0288,
"rewards/accuracies": 1.0,
"rewards/generated": -222.0593719482422,
"rewards/margins": 200.66824340820312,
"rewards/real": -21.39112663269043,
"step": 170
},
{
"epoch": 0.6545454545454545,
"grad_norm": 4554.734421731353,
"learning_rate": 9.898989898989899e-08,
"logits/generated": 3.6708030700683594,
"logits/real": 3.8833823204040527,
"logps/generated": -1624.985595703125,
"logps/real": -475.5914001464844,
"loss": 0.1764,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -234.6440887451172,
"rewards/margins": 217.81668090820312,
"rewards/real": -16.827428817749023,
"step": 180
},
{
"epoch": 0.6909090909090909,
"grad_norm": 795.9220457751134,
"learning_rate": 9.83164983164983e-08,
"logits/generated": 3.776212692260742,
"logits/real": 3.720076084136963,
"logps/generated": -1932.6201171875,
"logps/real": -496.31768798828125,
"loss": 0.0109,
"rewards/accuracies": 1.0,
"rewards/generated": -242.80508422851562,
"rewards/margins": 221.8138427734375,
"rewards/real": -20.991230010986328,
"step": 190
},
{
"epoch": 0.7272727272727273,
"grad_norm": 7.948293162415954e-09,
"learning_rate": 9.764309764309763e-08,
"logits/generated": 3.4761600494384766,
"logits/real": 3.7583975791931152,
"logps/generated": -1729.9085693359375,
"logps/real": -629.2281494140625,
"loss": 0.0175,
"rewards/accuracies": 1.0,
"rewards/generated": -243.0241241455078,
"rewards/margins": 225.57907104492188,
"rewards/real": -17.445043563842773,
"step": 200
},
{
"epoch": 0.7636363636363637,
"grad_norm": 11854.565351399473,
"learning_rate": 9.696969696969697e-08,
"logits/generated": 3.1848533153533936,
"logits/real": 3.9498603343963623,
"logps/generated": -1566.5595703125,
"logps/real": -527.8526611328125,
"loss": 0.0562,
"rewards/accuracies": 1.0,
"rewards/generated": -251.7228546142578,
"rewards/margins": 224.41226196289062,
"rewards/real": -27.310577392578125,
"step": 210
},
{
"epoch": 0.8,
"grad_norm": 1.324414784532279e-09,
"learning_rate": 9.629629629629629e-08,
"logits/generated": 3.6429061889648438,
"logits/real": 4.131524085998535,
"logps/generated": -1664.998291015625,
"logps/real": -429.4391174316406,
"loss": 0.0953,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -265.4717712402344,
"rewards/margins": 244.45150756835938,
"rewards/real": -21.020299911499023,
"step": 220
},
{
"epoch": 0.8363636363636363,
"grad_norm": 2516.386914307099,
"learning_rate": 9.562289562289561e-08,
"logits/generated": 3.362239360809326,
"logits/real": 4.2897210121154785,
"logps/generated": -1774.610107421875,
"logps/real": -527.5950317382812,
"loss": 0.0604,
"rewards/accuracies": 1.0,
"rewards/generated": -281.13580322265625,
"rewards/margins": 258.14569091796875,
"rewards/real": -22.99008560180664,
"step": 230
},
{
"epoch": 0.8727272727272727,
"grad_norm": 4210.019236605774,
"learning_rate": 9.494949494949494e-08,
"logits/generated": 3.58394193649292,
"logits/real": 3.9923107624053955,
"logps/generated": -1617.4654541015625,
"logps/real": -498.33056640625,
"loss": 0.1084,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -241.74960327148438,
"rewards/margins": 225.633056640625,
"rewards/real": -16.116552352905273,
"step": 240
},
{
"epoch": 0.9090909090909091,
"grad_norm": 3.992364816128141e-15,
"learning_rate": 9.427609427609428e-08,
"logits/generated": 3.3853213787078857,
"logits/real": 3.808389186859131,
"logps/generated": -1583.3477783203125,
"logps/real": -526.8497314453125,
"loss": 0.0632,
"rewards/accuracies": 1.0,
"rewards/generated": -252.41494750976562,
"rewards/margins": 236.5117645263672,
"rewards/real": -15.90319538116455,
"step": 250
},
{
"epoch": 0.9454545454545454,
"grad_norm": 0.06379282101245505,
"learning_rate": 9.36026936026936e-08,
"logits/generated": 3.554725170135498,
"logits/real": 3.842238664627075,
"logps/generated": -1643.524658203125,
"logps/real": -493.2911682128906,
"loss": 0.0565,
"rewards/accuracies": 1.0,
"rewards/generated": -256.9104309082031,
"rewards/margins": 242.4754638671875,
"rewards/real": -14.434976577758789,
"step": 260
},
{
"epoch": 0.9818181818181818,
"grad_norm": 2.1316518426288745e-15,
"learning_rate": 9.292929292929292e-08,
"logits/generated": 3.709350109100342,
"logits/real": 3.892772674560547,
"logps/generated": -1499.9554443359375,
"logps/real": -514.9131469726562,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/generated": -262.5176086425781,
"rewards/margins": 242.5634002685547,
"rewards/real": -19.954198837280273,
"step": 270
},
{
"epoch": 1.018181818181818,
"grad_norm": 2.144913603858407e-16,
"learning_rate": 9.225589225589225e-08,
"logits/generated": 3.527078151702881,
"logits/real": 4.176957130432129,
"logps/generated": -1430.626953125,
"logps/real": -487.687744140625,
"loss": 0.0475,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -263.19024658203125,
"rewards/margins": 248.2545166015625,
"rewards/real": -14.93572998046875,
"step": 280
},
{
"epoch": 1.0545454545454545,
"grad_norm": 9.262988835020684e-16,
"learning_rate": 9.158249158249159e-08,
"logits/generated": 3.8879787921905518,
"logits/real": 3.7888145446777344,
"logps/generated": -1669.9515380859375,
"logps/real": -599.4093627929688,
"loss": 0.022,
"rewards/accuracies": 1.0,
"rewards/generated": -260.335693359375,
"rewards/margins": 241.45071411132812,
"rewards/real": -18.88497543334961,
"step": 290
},
{
"epoch": 1.0909090909090908,
"grad_norm": 1.632491167864082e-15,
"learning_rate": 9.09090909090909e-08,
"logits/generated": 3.4893486499786377,
"logits/real": 4.106193542480469,
"logps/generated": -1454.445068359375,
"logps/real": -541.4019775390625,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -303.18951416015625,
"rewards/margins": 274.5284423828125,
"rewards/real": -28.661075592041016,
"step": 300
},
{
"epoch": 1.1272727272727272,
"grad_norm": 1.3391428570009603e-26,
"learning_rate": 9.023569023569023e-08,
"logits/generated": 3.674029588699341,
"logits/real": 4.17787504196167,
"logps/generated": -1690.72265625,
"logps/real": -436.976318359375,
"loss": 0.0069,
"rewards/accuracies": 1.0,
"rewards/generated": -316.36273193359375,
"rewards/margins": 290.0094299316406,
"rewards/real": -26.353296279907227,
"step": 310
},
{
"epoch": 1.1636363636363636,
"grad_norm": 0.00020046209172408023,
"learning_rate": 8.956228956228956e-08,
"logits/generated": 3.833738327026367,
"logits/real": 3.479259967803955,
"logps/generated": -1615.9827880859375,
"logps/real": -502.75750732421875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -280.38818359375,
"rewards/margins": 261.127685546875,
"rewards/real": -19.260507583618164,
"step": 320
},
{
"epoch": 1.2,
"grad_norm": 1.621254944519754e-22,
"learning_rate": 8.888888888888888e-08,
"logits/generated": 3.559558868408203,
"logits/real": 4.057076454162598,
"logps/generated": -1680.08203125,
"logps/real": -491.20831298828125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -297.2218017578125,
"rewards/margins": 271.63812255859375,
"rewards/real": -25.583667755126953,
"step": 330
},
{
"epoch": 1.2363636363636363,
"grad_norm": 1.6991550894734641e-12,
"learning_rate": 8.821548821548821e-08,
"logits/generated": 3.3086647987365723,
"logits/real": 4.217398643493652,
"logps/generated": -1606.6771240234375,
"logps/real": -510.59234619140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -320.9579772949219,
"rewards/margins": 285.43359375,
"rewards/real": -35.524383544921875,
"step": 340
},
{
"epoch": 1.2727272727272727,
"grad_norm": 2.818660541318421e-27,
"learning_rate": 8.754208754208754e-08,
"logits/generated": 3.3664917945861816,
"logits/real": 4.047441005706787,
"logps/generated": -1408.769775390625,
"logps/real": -509.589111328125,
"loss": 0.1804,
"rewards/accuracies": 1.0,
"rewards/generated": -286.76177978515625,
"rewards/margins": 259.9476013183594,
"rewards/real": -26.814172744750977,
"step": 350
},
{
"epoch": 1.309090909090909,
"grad_norm": 0.0007089825415081091,
"learning_rate": 8.686868686868686e-08,
"logits/generated": 3.5507493019104004,
"logits/real": 3.4634432792663574,
"logps/generated": -1446.5318603515625,
"logps/real": -544.6498413085938,
"loss": 0.1174,
"rewards/accuracies": 0.9750000238418579,
"rewards/generated": -265.806396484375,
"rewards/margins": 238.1927032470703,
"rewards/real": -27.613704681396484,
"step": 360
},
{
"epoch": 1.3454545454545455,
"grad_norm": 1.1305549325804168e-08,
"learning_rate": 8.619528619528619e-08,
"logits/generated": 3.628211259841919,
"logits/real": 3.5758774280548096,
"logps/generated": -1428.930908203125,
"logps/real": -562.1917114257812,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/generated": -255.12948608398438,
"rewards/margins": 239.59548950195312,
"rewards/real": -15.534004211425781,
"step": 370
},
{
"epoch": 1.3818181818181818,
"grad_norm": 1.0350994829795507e-05,
"learning_rate": 8.552188552188552e-08,
"logits/generated": 3.7924466133117676,
"logits/real": 3.9518685340881348,
"logps/generated": -1539.1009521484375,
"logps/real": -527.0864868164062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -264.60162353515625,
"rewards/margins": 240.48681640625,
"rewards/real": -24.114810943603516,
"step": 380
},
{
"epoch": 1.4181818181818182,
"grad_norm": 0.36389886787684217,
"learning_rate": 8.484848484848484e-08,
"logits/generated": 3.520686626434326,
"logits/real": 4.119534492492676,
"logps/generated": -1430.095458984375,
"logps/real": -599.5445556640625,
"loss": 0.0589,
"rewards/accuracies": 1.0,
"rewards/generated": -281.20819091796875,
"rewards/margins": 268.37298583984375,
"rewards/real": -12.835179328918457,
"step": 390
},
{
"epoch": 1.4545454545454546,
"grad_norm": 2.8324502759319933e-05,
"learning_rate": 8.417508417508418e-08,
"logits/generated": 3.5009918212890625,
"logits/real": 3.8314614295959473,
"logps/generated": -1441.1917724609375,
"logps/real": -531.7547607421875,
"loss": 0.0416,
"rewards/accuracies": 1.0,
"rewards/generated": -252.5347442626953,
"rewards/margins": 230.76644897460938,
"rewards/real": -21.7683162689209,
"step": 400
},
{
"epoch": 1.490909090909091,
"grad_norm": 5094.3564893468065,
"learning_rate": 8.35016835016835e-08,
"logits/generated": 3.4917893409729004,
"logits/real": 3.65199613571167,
"logps/generated": -1599.940185546875,
"logps/real": -547.7235107421875,
"loss": 0.0251,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -271.635498046875,
"rewards/margins": 244.7548370361328,
"rewards/real": -26.880685806274414,
"step": 410
},
{
"epoch": 1.5272727272727273,
"grad_norm": 6.718966014450532e-14,
"learning_rate": 8.282828282828282e-08,
"logits/generated": 3.1480331420898438,
"logits/real": 4.2137770652771,
"logps/generated": -1394.23095703125,
"logps/real": -533.8201904296875,
"loss": 0.0282,
"rewards/accuracies": 1.0,
"rewards/generated": -256.77789306640625,
"rewards/margins": 245.2924041748047,
"rewards/real": -11.485471725463867,
"step": 420
},
{
"epoch": 1.5636363636363635,
"grad_norm": 1.1949041372307487e-08,
"learning_rate": 8.215488215488215e-08,
"logits/generated": 3.9812686443328857,
"logits/real": 3.8523590564727783,
"logps/generated": -1250.764404296875,
"logps/real": -469.63525390625,
"loss": 0.0252,
"rewards/accuracies": 1.0,
"rewards/generated": -210.96096801757812,
"rewards/margins": 207.98599243164062,
"rewards/real": -2.974966049194336,
"step": 430
},
{
"epoch": 1.6,
"grad_norm": 5.239520709960669e-07,
"learning_rate": 8.148148148148148e-08,
"logits/generated": 3.438265562057495,
"logits/real": 3.952139377593994,
"logps/generated": -1334.8717041015625,
"logps/real": -479.496337890625,
"loss": 0.039,
"rewards/accuracies": 1.0,
"rewards/generated": -214.3938751220703,
"rewards/margins": 214.3829345703125,
"rewards/real": -0.01094741839915514,
"step": 440
},
{
"epoch": 1.6363636363636362,
"grad_norm": 1.3071416231646523e-15,
"learning_rate": 8.08080808080808e-08,
"logits/generated": 3.3865771293640137,
"logits/real": 3.869033098220825,
"logps/generated": -1464.185546875,
"logps/real": -487.26104736328125,
"loss": 0.0099,
"rewards/accuracies": 1.0,
"rewards/generated": -220.9183349609375,
"rewards/margins": 223.3875732421875,
"rewards/real": 2.469271183013916,
"step": 450
},
{
"epoch": 1.6727272727272728,
"grad_norm": 5.809273751552379e-27,
"learning_rate": 8.013468013468013e-08,
"logits/generated": 3.5787856578826904,
"logits/real": 4.100471496582031,
"logps/generated": -1492.1751708984375,
"logps/real": -459.5709533691406,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/generated": -271.4466247558594,
"rewards/margins": 263.6124572753906,
"rewards/real": -7.834146022796631,
"step": 460
},
{
"epoch": 1.709090909090909,
"grad_norm": 1.099730928036333e-14,
"learning_rate": 7.946127946127946e-08,
"logits/generated": 3.8285858631134033,
"logits/real": 4.042223930358887,
"logps/generated": -1498.905029296875,
"logps/real": -461.4064025878906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -297.97821044921875,
"rewards/margins": 278.54345703125,
"rewards/real": -19.434696197509766,
"step": 470
},
{
"epoch": 1.7454545454545456,
"grad_norm": 1.0056219855024129e-26,
"learning_rate": 7.878787878787878e-08,
"logits/generated": 3.902775287628174,
"logits/real": 4.0818986892700195,
"logps/generated": -1425.9718017578125,
"logps/real": -498.4969787597656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -269.1473083496094,
"rewards/margins": 257.8677062988281,
"rewards/real": -11.279605865478516,
"step": 480
},
{
"epoch": 1.7818181818181817,
"grad_norm": 1.4824234977472797e-27,
"learning_rate": 7.811447811447811e-08,
"logits/generated": 3.4052627086639404,
"logits/real": 3.743426561355591,
"logps/generated": -1479.702392578125,
"logps/real": -476.9375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -264.81494140625,
"rewards/margins": 250.8484344482422,
"rewards/real": -13.966524124145508,
"step": 490
},
{
"epoch": 1.8181818181818183,
"grad_norm": 0.0001152440497230488,
"learning_rate": 7.744107744107744e-08,
"logits/generated": 3.205287218093872,
"logits/real": 4.380854606628418,
"logps/generated": -1461.5018310546875,
"logps/real": -577.3456420898438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -283.0616149902344,
"rewards/margins": 271.5289001464844,
"rewards/real": -11.532726287841797,
"step": 500
},
{
"epoch": 1.8545454545454545,
"grad_norm": 9.961164548824944e-16,
"learning_rate": 7.676767676767677e-08,
"logits/generated": 3.8037960529327393,
"logits/real": 4.577506065368652,
"logps/generated": -1738.124755859375,
"logps/real": -426.62908935546875,
"loss": 0.0211,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -278.2953186035156,
"rewards/margins": 274.4579162597656,
"rewards/real": -3.8374037742614746,
"step": 510
},
{
"epoch": 1.8909090909090909,
"grad_norm": 1.5112555232080375e-25,
"learning_rate": 7.609427609427609e-08,
"logits/generated": 3.249746799468994,
"logits/real": 4.040205955505371,
"logps/generated": -1564.7763671875,
"logps/real": -596.6712646484375,
"loss": 0.0082,
"rewards/accuracies": 1.0,
"rewards/generated": -282.82269287109375,
"rewards/margins": 289.08319091796875,
"rewards/real": 6.260525703430176,
"step": 520
},
{
"epoch": 1.9272727272727272,
"grad_norm": 2.544045781036373e-31,
"learning_rate": 7.542087542087542e-08,
"logits/generated": 3.76676607131958,
"logits/real": 3.9748542308807373,
"logps/generated": -1476.042724609375,
"logps/real": -493.10223388671875,
"loss": 0.0039,
"rewards/accuracies": 1.0,
"rewards/generated": -213.38961791992188,
"rewards/margins": 221.9373779296875,
"rewards/real": 8.547750473022461,
"step": 530
},
{
"epoch": 1.9636363636363636,
"grad_norm": 1.1070566434496538e-14,
"learning_rate": 7.474747474747475e-08,
"logits/generated": 3.9607677459716797,
"logits/real": 4.024069786071777,
"logps/generated": -1877.058837890625,
"logps/real": -491.959228515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -255.7445068359375,
"rewards/margins": 262.62823486328125,
"rewards/real": 6.883699893951416,
"step": 540
},
{
"epoch": 2.0,
"grad_norm": 961.3469663092699,
"learning_rate": 7.407407407407407e-08,
"logits/generated": 3.349459409713745,
"logits/real": 4.006588935852051,
"logps/generated": -1369.419677734375,
"logps/real": -464.02130126953125,
"loss": 0.0024,
"rewards/accuracies": 1.0,
"rewards/generated": -222.8209991455078,
"rewards/margins": 233.3080291748047,
"rewards/real": 10.487030029296875,
"step": 550
},
{
"epoch": 2.036363636363636,
"grad_norm": 9.057600032535732e-10,
"learning_rate": 7.34006734006734e-08,
"logits/generated": 3.6556448936462402,
"logits/real": 3.9517414569854736,
"logps/generated": -1621.7646484375,
"logps/real": -549.6907348632812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -277.4305725097656,
"rewards/margins": 283.687255859375,
"rewards/real": 6.2567033767700195,
"step": 560
},
{
"epoch": 2.0727272727272728,
"grad_norm": 7.139382445177019e-20,
"learning_rate": 7.272727272727273e-08,
"logits/generated": 3.4937987327575684,
"logits/real": 4.038242340087891,
"logps/generated": -1512.7313232421875,
"logps/real": -530.453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -251.084716796875,
"rewards/margins": 252.330810546875,
"rewards/real": 1.2460654973983765,
"step": 570
},
{
"epoch": 2.109090909090909,
"grad_norm": 3.2270331679056034e-19,
"learning_rate": 7.205387205387205e-08,
"logits/generated": 3.7572948932647705,
"logits/real": 4.378727912902832,
"logps/generated": -1412.48388671875,
"logps/real": -465.0306701660156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -261.629638671875,
"rewards/margins": 273.5119323730469,
"rewards/real": 11.882287979125977,
"step": 580
},
{
"epoch": 2.1454545454545455,
"grad_norm": 4.775871749612275e-12,
"learning_rate": 7.138047138047138e-08,
"logits/generated": 3.9829063415527344,
"logits/real": 4.248471260070801,
"logps/generated": -1897.7740478515625,
"logps/real": -450.75146484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -273.78240966796875,
"rewards/margins": 273.58770751953125,
"rewards/real": -0.1947340965270996,
"step": 590
},
{
"epoch": 2.1818181818181817,
"grad_norm": 0.43906412049943816,
"learning_rate": 7.070707070707071e-08,
"logits/generated": 3.9363341331481934,
"logits/real": 4.091122150421143,
"logps/generated": -1581.771728515625,
"logps/real": -453.38946533203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -227.46475219726562,
"rewards/margins": 232.3105926513672,
"rewards/real": 4.845867156982422,
"step": 600
},
{
"epoch": 2.2181818181818183,
"grad_norm": 4.003271106180214e-16,
"learning_rate": 7.003367003367003e-08,
"logits/generated": 3.628199815750122,
"logits/real": 3.911200761795044,
"logps/generated": -1624.291015625,
"logps/real": -534.2811889648438,
"loss": 0.0017,
"rewards/accuracies": 1.0,
"rewards/generated": -305.51153564453125,
"rewards/margins": 298.5929870605469,
"rewards/real": -6.918566703796387,
"step": 610
},
{
"epoch": 2.2545454545454544,
"grad_norm": 4.825557199789743e-16,
"learning_rate": 6.936026936026935e-08,
"logits/generated": 4.049774646759033,
"logits/real": 4.147176265716553,
"logps/generated": -1385.0206298828125,
"logps/real": -495.1424255371094,
"loss": 0.0062,
"rewards/accuracies": 1.0,
"rewards/generated": -254.9941864013672,
"rewards/margins": 243.9451141357422,
"rewards/real": -11.049083709716797,
"step": 620
},
{
"epoch": 2.290909090909091,
"grad_norm": 9.384413995054146e-15,
"learning_rate": 6.868686868686869e-08,
"logits/generated": 3.82362699508667,
"logits/real": 3.9312844276428223,
"logps/generated": -1712.203125,
"logps/real": -509.4066467285156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -359.2261657714844,
"rewards/margins": 337.8033752441406,
"rewards/real": -21.422758102416992,
"step": 630
},
{
"epoch": 2.327272727272727,
"grad_norm": 1.6226130118471855e-20,
"learning_rate": 6.801346801346801e-08,
"logits/generated": 3.2555956840515137,
"logits/real": 3.4989371299743652,
"logps/generated": -1550.715087890625,
"logps/real": -534.7842407226562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -362.5604553222656,
"rewards/margins": 341.76409912109375,
"rewards/real": -20.79628562927246,
"step": 640
},
{
"epoch": 2.3636363636363638,
"grad_norm": 2445.668734526786,
"learning_rate": 6.734006734006734e-08,
"logits/generated": 4.016414642333984,
"logits/real": 4.205541133880615,
"logps/generated": -1622.1700439453125,
"logps/real": -519.1929321289062,
"loss": 0.0011,
"rewards/accuracies": 1.0,
"rewards/generated": -359.9587097167969,
"rewards/margins": 332.77911376953125,
"rewards/real": -27.17962646484375,
"step": 650
},
{
"epoch": 2.4,
"grad_norm": 0.0,
"learning_rate": 6.666666666666665e-08,
"logits/generated": 3.48205304145813,
"logits/real": 3.7544798851013184,
"logps/generated": -1831.762451171875,
"logps/real": -557.1226806640625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -348.6358947753906,
"rewards/margins": 326.20648193359375,
"rewards/real": -22.429412841796875,
"step": 660
},
{
"epoch": 2.4363636363636365,
"grad_norm": 1.5853853765821598e-23,
"learning_rate": 6.5993265993266e-08,
"logits/generated": 3.557918071746826,
"logits/real": 3.428406238555908,
"logps/generated": -1539.081298828125,
"logps/real": -518.9280395507812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -370.35260009765625,
"rewards/margins": 344.65423583984375,
"rewards/real": -25.69829750061035,
"step": 670
},
{
"epoch": 2.4727272727272727,
"grad_norm": 3.430128294202441e-08,
"learning_rate": 6.531986531986532e-08,
"logits/generated": 3.484684467315674,
"logits/real": 4.016026496887207,
"logps/generated": -1661.240478515625,
"logps/real": -544.1554565429688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -373.469970703125,
"rewards/margins": 352.1344909667969,
"rewards/real": -21.33551597595215,
"step": 680
},
{
"epoch": 2.509090909090909,
"grad_norm": 0.0,
"learning_rate": 6.464646464646465e-08,
"logits/generated": 3.8870773315429688,
"logits/real": 4.1783223152160645,
"logps/generated": -1835.35546875,
"logps/real": -525.7906494140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -370.15228271484375,
"rewards/margins": 348.29742431640625,
"rewards/real": -21.854822158813477,
"step": 690
},
{
"epoch": 2.5454545454545454,
"grad_norm": 1.485520291302884e-17,
"learning_rate": 6.397306397306396e-08,
"logits/generated": 3.5036914348602295,
"logits/real": 3.7754790782928467,
"logps/generated": -1453.15087890625,
"logps/real": -552.2473754882812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -319.9031982421875,
"rewards/margins": 297.2294006347656,
"rewards/real": -22.673809051513672,
"step": 700
},
{
"epoch": 2.581818181818182,
"grad_norm": 9925.761806176399,
"learning_rate": 6.32996632996633e-08,
"logits/generated": 3.4248390197753906,
"logits/real": 4.034340858459473,
"logps/generated": -1423.414306640625,
"logps/real": -578.4390258789062,
"loss": 0.0493,
"rewards/accuracies": 0.987500011920929,
"rewards/generated": -330.52459716796875,
"rewards/margins": 309.222412109375,
"rewards/real": -21.302127838134766,
"step": 710
},
{
"epoch": 2.618181818181818,
"grad_norm": 5.679867918386259e-29,
"learning_rate": 6.262626262626263e-08,
"logits/generated": 3.6940674781799316,
"logits/real": 4.145870685577393,
"logps/generated": -1283.764404296875,
"logps/real": -489.2547302246094,
"loss": 0.0185,
"rewards/accuracies": 1.0,
"rewards/generated": -298.9286193847656,
"rewards/margins": 290.1479797363281,
"rewards/real": -8.78062915802002,
"step": 720
},
{
"epoch": 2.6545454545454543,
"grad_norm": 7.526005825335279e-20,
"learning_rate": 6.195286195286194e-08,
"logits/generated": 3.155702590942383,
"logits/real": 3.7637996673583984,
"logps/generated": -1467.9083251953125,
"logps/real": -546.5178833007812,
"loss": 0.0168,
"rewards/accuracies": 1.0,
"rewards/generated": -369.35992431640625,
"rewards/margins": 345.2546081542969,
"rewards/real": -24.10533332824707,
"step": 730
},
{
"epoch": 2.690909090909091,
"grad_norm": 8.57224717026253e-28,
"learning_rate": 6.127946127946127e-08,
"logits/generated": 3.4195969104766846,
"logits/real": 3.9820468425750732,
"logps/generated": -1553.6126708984375,
"logps/real": -564.40087890625,
"loss": 0.0008,
"rewards/accuracies": 1.0,
"rewards/generated": -379.32421875,
"rewards/margins": 362.53631591796875,
"rewards/real": -16.787899017333984,
"step": 740
},
{
"epoch": 2.7272727272727275,
"grad_norm": 2.1648519421116784e-24,
"learning_rate": 6.060606060606061e-08,
"logits/generated": 3.6250579357147217,
"logits/real": 4.063885688781738,
"logps/generated": -1501.2998046875,
"logps/real": -615.9197998046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -342.18902587890625,
"rewards/margins": 327.02435302734375,
"rewards/real": -15.164652824401855,
"step": 750
},
{
"epoch": 2.7636363636363637,
"grad_norm": 1.4069459552366719e-24,
"learning_rate": 5.993265993265994e-08,
"logits/generated": 3.341201066970825,
"logits/real": 3.9008383750915527,
"logps/generated": -1905.3343505859375,
"logps/real": -511.04498291015625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -409.7513732910156,
"rewards/margins": 391.1456604003906,
"rewards/real": -18.605722427368164,
"step": 760
},
{
"epoch": 2.8,
"grad_norm": 3.0242780956024674e-07,
"learning_rate": 5.925925925925925e-08,
"logits/generated": 4.076157569885254,
"logits/real": 3.7209954261779785,
"logps/generated": -1816.0787353515625,
"logps/real": -485.0223693847656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -351.87799072265625,
"rewards/margins": 337.22857666015625,
"rewards/real": -14.649354934692383,
"step": 770
},
{
"epoch": 2.8363636363636364,
"grad_norm": 1.2592245187280255e-18,
"learning_rate": 5.8585858585858584e-08,
"logits/generated": 3.7142958641052246,
"logits/real": 4.193110466003418,
"logps/generated": -1846.4498291015625,
"logps/real": -518.27392578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -395.49456787109375,
"rewards/margins": 373.5671081542969,
"rewards/real": -21.927494049072266,
"step": 780
},
{
"epoch": 2.8727272727272726,
"grad_norm": 1.5298202167064291e-19,
"learning_rate": 5.791245791245791e-08,
"logits/generated": 3.505951404571533,
"logits/real": 4.056126117706299,
"logps/generated": -1407.225341796875,
"logps/real": -542.5260620117188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -360.354248046875,
"rewards/margins": 346.49407958984375,
"rewards/real": -13.860153198242188,
"step": 790
},
{
"epoch": 2.909090909090909,
"grad_norm": 7.66403093195095e-35,
"learning_rate": 5.723905723905723e-08,
"logits/generated": 3.39953875541687,
"logits/real": 3.984081268310547,
"logps/generated": -1458.378173828125,
"logps/real": -513.1370849609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -357.29925537109375,
"rewards/margins": 334.2768249511719,
"rewards/real": -23.022445678710938,
"step": 800
},
{
"epoch": 2.9454545454545453,
"grad_norm": 2.1123843398831784e-30,
"learning_rate": 5.6565656565656564e-08,
"logits/generated": 3.5056838989257812,
"logits/real": 3.719235897064209,
"logps/generated": -1713.8677978515625,
"logps/real": -522.7982177734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -361.1312561035156,
"rewards/margins": 344.48333740234375,
"rewards/real": -16.64789390563965,
"step": 810
},
{
"epoch": 2.981818181818182,
"grad_norm": 8.833278672851137e-20,
"learning_rate": 5.589225589225589e-08,
"logits/generated": 3.532017469406128,
"logits/real": 4.0720086097717285,
"logps/generated": -1373.71630859375,
"logps/real": -477.915771484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -389.9593505859375,
"rewards/margins": 374.3216857910156,
"rewards/real": -15.637689590454102,
"step": 820
},
{
"epoch": 3.018181818181818,
"grad_norm": 1.829241516518958e-07,
"learning_rate": 5.521885521885522e-08,
"logits/generated": 3.942246198654175,
"logits/real": 4.765660285949707,
"logps/generated": -1411.296875,
"logps/real": -509.1822204589844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -349.03863525390625,
"rewards/margins": 333.57861328125,
"rewards/real": -15.460012435913086,
"step": 830
},
{
"epoch": 3.0545454545454547,
"grad_norm": 7.515486186427604e-28,
"learning_rate": 5.454545454545454e-08,
"logits/generated": 3.83001708984375,
"logits/real": 4.423516273498535,
"logps/generated": -1518.19921875,
"logps/real": -510.94927978515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -343.0306396484375,
"rewards/margins": 320.9806213378906,
"rewards/real": -22.049989700317383,
"step": 840
},
{
"epoch": 3.090909090909091,
"grad_norm": 1.0019439073528928e-27,
"learning_rate": 5.387205387205387e-08,
"logits/generated": 3.4515597820281982,
"logits/real": 3.6501259803771973,
"logps/generated": -1589.603759765625,
"logps/real": -517.35205078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -361.349853515625,
"rewards/margins": 343.94830322265625,
"rewards/real": -17.401607513427734,
"step": 850
},
{
"epoch": 3.1272727272727274,
"grad_norm": 5.331701377808202e-19,
"learning_rate": 5.31986531986532e-08,
"logits/generated": 3.70512056350708,
"logits/real": 3.7677884101867676,
"logps/generated": -1549.2159423828125,
"logps/real": -446.04559326171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -370.1104736328125,
"rewards/margins": 347.8051452636719,
"rewards/real": -22.305322647094727,
"step": 860
},
{
"epoch": 3.1636363636363636,
"grad_norm": 1.678736301504087e-11,
"learning_rate": 5.2525252525252525e-08,
"logits/generated": 4.319781303405762,
"logits/real": 3.8676934242248535,
"logps/generated": -2044.4833984375,
"logps/real": -440.07586669921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -390.758056640625,
"rewards/margins": 374.1348571777344,
"rewards/real": -16.623199462890625,
"step": 870
},
{
"epoch": 3.2,
"grad_norm": 6.192331870232493e-20,
"learning_rate": 5.1851851851851846e-08,
"logits/generated": 3.774130344390869,
"logits/real": 3.9397501945495605,
"logps/generated": -1745.0888671875,
"logps/real": -427.6468811035156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -348.3378601074219,
"rewards/margins": 329.65423583984375,
"rewards/real": -18.683679580688477,
"step": 880
},
{
"epoch": 3.2363636363636363,
"grad_norm": 4.132428468024897e-32,
"learning_rate": 5.117845117845118e-08,
"logits/generated": 3.667954921722412,
"logits/real": 3.71317982673645,
"logps/generated": -1767.097412109375,
"logps/real": -496.12274169921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -374.12109375,
"rewards/margins": 357.9183044433594,
"rewards/real": -16.202762603759766,
"step": 890
},
{
"epoch": 3.2727272727272725,
"grad_norm": 1.6819218744361375e-14,
"learning_rate": 5.0505050505050506e-08,
"logits/generated": 3.7238926887512207,
"logits/real": 4.136239051818848,
"logps/generated": -1723.2008056640625,
"logps/real": -534.2486572265625,
"loss": 0.0403,
"rewards/accuracies": 1.0,
"rewards/generated": -420.83453369140625,
"rewards/margins": 403.14373779296875,
"rewards/real": -17.690826416015625,
"step": 900
},
{
"epoch": 3.309090909090909,
"grad_norm": 1.075818913331155e-28,
"learning_rate": 4.983164983164983e-08,
"logits/generated": 3.950953245162964,
"logits/real": 4.087274074554443,
"logps/generated": -1642.2506103515625,
"logps/real": -428.4207458496094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -393.48870849609375,
"rewards/margins": 367.9131774902344,
"rewards/real": -25.575532913208008,
"step": 910
},
{
"epoch": 3.3454545454545457,
"grad_norm": 1.8614782975594385e-13,
"learning_rate": 4.915824915824915e-08,
"logits/generated": 3.6677818298339844,
"logits/real": 4.1127095222473145,
"logps/generated": -1344.891845703125,
"logps/real": -514.3883056640625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -368.61151123046875,
"rewards/margins": 346.4088439941406,
"rewards/real": -22.202638626098633,
"step": 920
},
{
"epoch": 3.381818181818182,
"grad_norm": 3.381691941063049e-08,
"learning_rate": 4.8484848484848486e-08,
"logits/generated": 3.5154712200164795,
"logits/real": 3.966923236846924,
"logps/generated": -1584.695068359375,
"logps/real": -515.33984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -360.78082275390625,
"rewards/margins": 340.842529296875,
"rewards/real": -19.938283920288086,
"step": 930
},
{
"epoch": 3.418181818181818,
"grad_norm": 0.0,
"learning_rate": 4.781144781144781e-08,
"logits/generated": 3.2723331451416016,
"logits/real": 4.094303131103516,
"logps/generated": -1407.6539306640625,
"logps/real": -563.6630859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -374.88385009765625,
"rewards/margins": 347.38189697265625,
"rewards/real": -27.50186538696289,
"step": 940
},
{
"epoch": 3.4545454545454546,
"grad_norm": 3.9897612503453264e-26,
"learning_rate": 4.713804713804714e-08,
"logits/generated": 3.8031222820281982,
"logits/real": 4.582549095153809,
"logps/generated": -1604.9886474609375,
"logps/real": -469.01214599609375,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/generated": -366.40118408203125,
"rewards/margins": 351.3585510253906,
"rewards/real": -15.042654037475586,
"step": 950
},
{
"epoch": 3.4909090909090907,
"grad_norm": 1.1807088805922945e-20,
"learning_rate": 4.646464646464646e-08,
"logits/generated": 3.4880664348602295,
"logits/real": 3.898571014404297,
"logps/generated": -1486.0142822265625,
"logps/real": -506.51531982421875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -352.26165771484375,
"rewards/margins": 333.8659973144531,
"rewards/real": -18.395660400390625,
"step": 960
},
{
"epoch": 3.5272727272727273,
"grad_norm": 2.4268936271701313e-18,
"learning_rate": 4.5791245791245794e-08,
"logits/generated": 3.4788355827331543,
"logits/real": 3.822808027267456,
"logps/generated": -1364.0,
"logps/real": -505.9129333496094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -365.39385986328125,
"rewards/margins": 347.33087158203125,
"rewards/real": -18.06302833557129,
"step": 970
},
{
"epoch": 3.5636363636363635,
"grad_norm": 0.0,
"learning_rate": 4.5117845117845114e-08,
"logits/generated": 4.036175727844238,
"logits/real": 3.6656670570373535,
"logps/generated": -1476.5787353515625,
"logps/real": -500.7046813964844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -350.3294982910156,
"rewards/margins": 334.7972412109375,
"rewards/real": -15.532255172729492,
"step": 980
},
{
"epoch": 3.6,
"grad_norm": 3.456223568001992e-30,
"learning_rate": 4.444444444444444e-08,
"logits/generated": 3.490968704223633,
"logits/real": 3.8967537879943848,
"logps/generated": -1640.80859375,
"logps/real": -535.5618896484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -407.5831298828125,
"rewards/margins": 386.76226806640625,
"rewards/real": -20.820871353149414,
"step": 990
},
{
"epoch": 3.6363636363636362,
"grad_norm": 2.4055459783409575e-31,
"learning_rate": 4.377104377104377e-08,
"logits/generated": 3.8550515174865723,
"logits/real": 3.984800338745117,
"logps/generated": -1491.381591796875,
"logps/real": -536.5324096679688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -340.267822265625,
"rewards/margins": 324.9073791503906,
"rewards/real": -15.3604736328125,
"step": 1000
},
{
"epoch": 3.672727272727273,
"grad_norm": 8.548157838153945e-25,
"learning_rate": 4.3097643097643095e-08,
"logits/generated": 3.985334873199463,
"logits/real": 4.183938026428223,
"logps/generated": -2043.4547119140625,
"logps/real": -523.0987548828125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -398.6493225097656,
"rewards/margins": 380.72906494140625,
"rewards/real": -17.920269012451172,
"step": 1010
},
{
"epoch": 3.709090909090909,
"grad_norm": 3.0175261180501287e-07,
"learning_rate": 4.242424242424242e-08,
"logits/generated": 3.4043655395507812,
"logits/real": 4.124855041503906,
"logps/generated": -1469.7431640625,
"logps/real": -555.0632934570312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -387.6515808105469,
"rewards/margins": 371.17645263671875,
"rewards/real": -16.475154876708984,
"step": 1020
},
{
"epoch": 3.7454545454545456,
"grad_norm": 0.0,
"learning_rate": 4.175084175084175e-08,
"logits/generated": 3.7843337059020996,
"logits/real": 4.101785182952881,
"logps/generated": -1506.9027099609375,
"logps/real": -522.604736328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -355.43914794921875,
"rewards/margins": 334.7210693359375,
"rewards/real": -20.71808624267578,
"step": 1030
},
{
"epoch": 3.7818181818181817,
"grad_norm": 2.4111652226040675e-26,
"learning_rate": 4.1077441077441075e-08,
"logits/generated": 3.670746326446533,
"logits/real": 3.747389554977417,
"logps/generated": -1680.3013916015625,
"logps/real": -483.5108947753906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -358.0954895019531,
"rewards/margins": 347.24761962890625,
"rewards/real": -10.84788703918457,
"step": 1040
},
{
"epoch": 3.8181818181818183,
"grad_norm": 1.2074440559893782e-13,
"learning_rate": 4.04040404040404e-08,
"logits/generated": 3.5393459796905518,
"logits/real": 4.060755729675293,
"logps/generated": -1671.9293212890625,
"logps/real": -480.4324645996094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -372.8212585449219,
"rewards/margins": 355.241455078125,
"rewards/real": -17.579803466796875,
"step": 1050
},
{
"epoch": 3.8545454545454545,
"grad_norm": 2.0246114885936573e-34,
"learning_rate": 3.973063973063973e-08,
"logits/generated": 4.0824480056762695,
"logits/real": 3.579103946685791,
"logps/generated": -1707.8812255859375,
"logps/real": -445.0042419433594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -417.997802734375,
"rewards/margins": 405.29876708984375,
"rewards/real": -12.699002265930176,
"step": 1060
},
{
"epoch": 3.8909090909090907,
"grad_norm": 3.210144860679204e-17,
"learning_rate": 3.9057239057239056e-08,
"logits/generated": 3.7562339305877686,
"logits/real": 4.343286037445068,
"logps/generated": -1494.666015625,
"logps/real": -505.9931640625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -367.2537536621094,
"rewards/margins": 357.6014099121094,
"rewards/real": -9.65237808227539,
"step": 1070
},
{
"epoch": 3.9272727272727272,
"grad_norm": 2.9805943462413785e-15,
"learning_rate": 3.838383838383838e-08,
"logits/generated": 3.5484557151794434,
"logits/real": 4.100916862487793,
"logps/generated": -1519.9788818359375,
"logps/real": -524.9559936523438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -342.6521911621094,
"rewards/margins": 327.91302490234375,
"rewards/real": -14.739137649536133,
"step": 1080
},
{
"epoch": 3.963636363636364,
"grad_norm": 0.0,
"learning_rate": 3.771043771043771e-08,
"logits/generated": 3.668537139892578,
"logits/real": 3.8592376708984375,
"logps/generated": -1508.808349609375,
"logps/real": -527.437744140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -353.4441833496094,
"rewards/margins": 339.4230651855469,
"rewards/real": -14.0211181640625,
"step": 1090
},
{
"epoch": 4.0,
"grad_norm": 0.5006449032805113,
"learning_rate": 3.7037037037037036e-08,
"logits/generated": 3.732149600982666,
"logits/real": 3.8971447944641113,
"logps/generated": -1593.047607421875,
"logps/real": -482.2001037597656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -367.11297607421875,
"rewards/margins": 348.68048095703125,
"rewards/real": -18.43246078491211,
"step": 1100
},
{
"epoch": 4.036363636363636,
"grad_norm": 2.814433616854974e-27,
"learning_rate": 3.636363636363636e-08,
"logits/generated": 3.4156410694122314,
"logits/real": 4.165556907653809,
"logps/generated": -1596.3184814453125,
"logps/real": -573.5258178710938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -373.71844482421875,
"rewards/margins": 361.01336669921875,
"rewards/real": -12.705034255981445,
"step": 1110
},
{
"epoch": 4.072727272727272,
"grad_norm": 8.319973607571124e-15,
"learning_rate": 3.569023569023569e-08,
"logits/generated": 3.273974895477295,
"logits/real": 4.1000566482543945,
"logps/generated": -1501.887451171875,
"logps/real": -576.9967041015625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -380.444580078125,
"rewards/margins": 365.85614013671875,
"rewards/real": -14.588480949401855,
"step": 1120
},
{
"epoch": 4.109090909090909,
"grad_norm": 1.0017792103000478e-25,
"learning_rate": 3.501683501683502e-08,
"logits/generated": 3.6438803672790527,
"logits/real": 3.937204360961914,
"logps/generated": -1560.591064453125,
"logps/real": -508.82891845703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -353.357666015625,
"rewards/margins": 336.72894287109375,
"rewards/real": -16.628740310668945,
"step": 1130
},
{
"epoch": 4.1454545454545455,
"grad_norm": 2.4734257256576104e-16,
"learning_rate": 3.4343434343434344e-08,
"logits/generated": 3.737811326980591,
"logits/real": 4.338569641113281,
"logps/generated": -1994.470947265625,
"logps/real": -508.37786865234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -393.4563293457031,
"rewards/margins": 373.7255554199219,
"rewards/real": -19.73076820373535,
"step": 1140
},
{
"epoch": 4.181818181818182,
"grad_norm": 4.238864478180666e-30,
"learning_rate": 3.367003367003367e-08,
"logits/generated": 3.8650360107421875,
"logits/real": 4.163536071777344,
"logps/generated": -1378.536376953125,
"logps/real": -512.680419921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -349.7162780761719,
"rewards/margins": 331.3489685058594,
"rewards/real": -18.367273330688477,
"step": 1150
},
{
"epoch": 4.218181818181818,
"grad_norm": 2.4861501835665235e-19,
"learning_rate": 3.2996632996633e-08,
"logits/generated": 3.3802618980407715,
"logits/real": 4.011307716369629,
"logps/generated": -1465.8193359375,
"logps/real": -574.6527099609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -379.3877868652344,
"rewards/margins": 363.7222595214844,
"rewards/real": -15.665545463562012,
"step": 1160
},
{
"epoch": 4.254545454545455,
"grad_norm": 6.086185442066954e-31,
"learning_rate": 3.2323232323232324e-08,
"logits/generated": 3.559816360473633,
"logits/real": 4.023754119873047,
"logps/generated": -1803.6634521484375,
"logps/real": -554.5623168945312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -388.7065734863281,
"rewards/margins": 371.402587890625,
"rewards/real": -17.303974151611328,
"step": 1170
},
{
"epoch": 4.290909090909091,
"grad_norm": 7.871786797978349e-27,
"learning_rate": 3.164983164983165e-08,
"logits/generated": 3.4312922954559326,
"logits/real": 3.8822197914123535,
"logps/generated": -1608.0223388671875,
"logps/real": -601.10302734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -379.69024658203125,
"rewards/margins": 345.4664611816406,
"rewards/real": -34.223793029785156,
"step": 1180
},
{
"epoch": 4.327272727272727,
"grad_norm": 2.3723518217219765e-22,
"learning_rate": 3.097643097643097e-08,
"logits/generated": 3.6023402214050293,
"logits/real": 4.428885459899902,
"logps/generated": -1807.38671875,
"logps/real": -493.84429931640625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -422.08502197265625,
"rewards/margins": 390.57745361328125,
"rewards/real": -31.507583618164062,
"step": 1190
},
{
"epoch": 4.363636363636363,
"grad_norm": 2.433404900713148e-21,
"learning_rate": 3.0303030303030305e-08,
"logits/generated": 3.514697551727295,
"logits/real": 3.9592909812927246,
"logps/generated": -1493.529541015625,
"logps/real": -601.3435668945312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -357.14764404296875,
"rewards/margins": 331.7057189941406,
"rewards/real": -25.441919326782227,
"step": 1200
},
{
"epoch": 4.4,
"grad_norm": 0.0,
"learning_rate": 2.9629629629629625e-08,
"logits/generated": 3.5242507457733154,
"logits/real": 3.908127546310425,
"logps/generated": -1402.09716796875,
"logps/real": -534.2159423828125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -397.23370361328125,
"rewards/margins": 370.5049743652344,
"rewards/real": -26.72869873046875,
"step": 1210
},
{
"epoch": 4.4363636363636365,
"grad_norm": 6.204048985218319e-20,
"learning_rate": 2.8956228956228955e-08,
"logits/generated": 3.7267520427703857,
"logits/real": 4.163025379180908,
"logps/generated": -1701.4326171875,
"logps/real": -520.2593383789062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -418.1678771972656,
"rewards/margins": 393.18682861328125,
"rewards/real": -24.981050491333008,
"step": 1220
},
{
"epoch": 4.472727272727273,
"grad_norm": 1.9021979406608815e-23,
"learning_rate": 2.8282828282828282e-08,
"logits/generated": 3.478511095046997,
"logits/real": 4.237907409667969,
"logps/generated": -1657.092041015625,
"logps/real": -518.4788818359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -415.843505859375,
"rewards/margins": 392.0099792480469,
"rewards/real": -23.83350372314453,
"step": 1230
},
{
"epoch": 4.509090909090909,
"grad_norm": 3.7469204488575616e-32,
"learning_rate": 2.760942760942761e-08,
"logits/generated": 3.6705174446105957,
"logits/real": 3.9910926818847656,
"logps/generated": -1534.4541015625,
"logps/real": -463.1044006347656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -420.45538330078125,
"rewards/margins": 390.87957763671875,
"rewards/real": -29.57583236694336,
"step": 1240
},
{
"epoch": 4.545454545454545,
"grad_norm": 0.0,
"learning_rate": 2.6936026936026936e-08,
"logits/generated": 3.7410473823547363,
"logits/real": 4.114706993103027,
"logps/generated": -1494.1517333984375,
"logps/real": -452.7135314941406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -405.7174377441406,
"rewards/margins": 383.12359619140625,
"rewards/real": -22.593822479248047,
"step": 1250
},
{
"epoch": 4.581818181818182,
"grad_norm": 1.4884899843309194e-06,
"learning_rate": 2.6262626262626263e-08,
"logits/generated": 3.5744926929473877,
"logits/real": 4.0339460372924805,
"logps/generated": -1389.479248046875,
"logps/real": -527.5264892578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -397.1904296875,
"rewards/margins": 371.60284423828125,
"rewards/real": -25.587596893310547,
"step": 1260
},
{
"epoch": 4.618181818181818,
"grad_norm": 1.6503647943638648e-14,
"learning_rate": 2.558922558922559e-08,
"logits/generated": 4.005766868591309,
"logits/real": 4.283044338226318,
"logps/generated": -2155.259033203125,
"logps/real": -538.6444091796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -431.6084899902344,
"rewards/margins": 398.53594970703125,
"rewards/real": -33.07251739501953,
"step": 1270
},
{
"epoch": 4.654545454545454,
"grad_norm": 7.146810977608247e-31,
"learning_rate": 2.4915824915824916e-08,
"logits/generated": 3.9285404682159424,
"logits/real": 3.7168357372283936,
"logps/generated": -1552.205078125,
"logps/real": -487.2206115722656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -389.40643310546875,
"rewards/margins": 362.6419677734375,
"rewards/real": -26.764474868774414,
"step": 1280
},
{
"epoch": 4.690909090909091,
"grad_norm": 0.0,
"learning_rate": 2.4242424242424243e-08,
"logits/generated": 3.776773452758789,
"logits/real": 4.315047264099121,
"logps/generated": -1338.3245849609375,
"logps/real": -558.236083984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -364.6140441894531,
"rewards/margins": 339.8752136230469,
"rewards/real": -24.738853454589844,
"step": 1290
},
{
"epoch": 4.7272727272727275,
"grad_norm": 0.0,
"learning_rate": 2.356902356902357e-08,
"logits/generated": 3.5295321941375732,
"logits/real": 4.021418571472168,
"logps/generated": -1478.497314453125,
"logps/real": -499.4085388183594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -414.16436767578125,
"rewards/margins": 385.1223449707031,
"rewards/real": -29.041961669921875,
"step": 1300
},
{
"epoch": 4.763636363636364,
"grad_norm": 1.6611749401721815e-30,
"learning_rate": 2.2895622895622897e-08,
"logits/generated": 3.9792373180389404,
"logits/real": 4.20035457611084,
"logps/generated": -1674.936767578125,
"logps/real": -557.9586791992188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -390.08990478515625,
"rewards/margins": 362.26739501953125,
"rewards/real": -27.822498321533203,
"step": 1310
},
{
"epoch": 4.8,
"grad_norm": 1.3498064441084664e-28,
"learning_rate": 2.222222222222222e-08,
"logits/generated": 3.1779017448425293,
"logits/real": 3.6151771545410156,
"logps/generated": -1536.013916015625,
"logps/real": -532.0733642578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -406.447265625,
"rewards/margins": 378.7431640625,
"rewards/real": -27.704113006591797,
"step": 1320
},
{
"epoch": 4.836363636363636,
"grad_norm": 1.4446934503565444e-19,
"learning_rate": 2.1548821548821547e-08,
"logits/generated": 3.237544536590576,
"logits/real": 3.6873066425323486,
"logps/generated": -1580.73388671875,
"logps/real": -503.3658752441406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -422.28558349609375,
"rewards/margins": 395.5210876464844,
"rewards/real": -26.764461517333984,
"step": 1330
},
{
"epoch": 4.872727272727273,
"grad_norm": 1.338898110592278e-13,
"learning_rate": 2.0875420875420874e-08,
"logits/generated": 3.466773271560669,
"logits/real": 4.2854814529418945,
"logps/generated": -1519.102294921875,
"logps/real": -484.23529052734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -394.77484130859375,
"rewards/margins": 370.26416015625,
"rewards/real": -24.510705947875977,
"step": 1340
},
{
"epoch": 4.909090909090909,
"grad_norm": 9.330317502415284e-18,
"learning_rate": 2.02020202020202e-08,
"logits/generated": 3.3465042114257812,
"logits/real": 4.172385215759277,
"logps/generated": -1484.463134765625,
"logps/real": -564.1114501953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -399.4004211425781,
"rewards/margins": 375.4627380371094,
"rewards/real": -23.937633514404297,
"step": 1350
},
{
"epoch": 4.945454545454545,
"grad_norm": 1.0844770956022308e-29,
"learning_rate": 1.9528619528619528e-08,
"logits/generated": 3.637136936187744,
"logits/real": 3.7470059394836426,
"logps/generated": -1451.9078369140625,
"logps/real": -555.2150268554688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -409.23699951171875,
"rewards/margins": 382.7684020996094,
"rewards/real": -26.468582153320312,
"step": 1360
},
{
"epoch": 4.9818181818181815,
"grad_norm": 1.8117589004328856e-21,
"learning_rate": 1.8855218855218855e-08,
"logits/generated": 3.419265031814575,
"logits/real": 4.801307678222656,
"logps/generated": -1575.279541015625,
"logps/real": -505.57366943359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -392.1907653808594,
"rewards/margins": 370.88433837890625,
"rewards/real": -21.30645751953125,
"step": 1370
},
{
"epoch": 5.0181818181818185,
"grad_norm": 1.1855551970740116e-24,
"learning_rate": 1.818181818181818e-08,
"logits/generated": 3.845301389694214,
"logits/real": 4.155182361602783,
"logps/generated": -1829.9830322265625,
"logps/real": -475.1502990722656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -390.05047607421875,
"rewards/margins": 369.96630859375,
"rewards/real": -20.084190368652344,
"step": 1380
},
{
"epoch": 5.054545454545455,
"grad_norm": 0.0,
"learning_rate": 1.750841750841751e-08,
"logits/generated": 3.4445242881774902,
"logits/real": 3.744450092315674,
"logps/generated": -1426.3824462890625,
"logps/real": -522.4410400390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -403.8083190917969,
"rewards/margins": 379.8366394042969,
"rewards/real": -23.9716854095459,
"step": 1390
},
{
"epoch": 5.090909090909091,
"grad_norm": 0.0,
"learning_rate": 1.6835016835016835e-08,
"logits/generated": 3.5476207733154297,
"logits/real": 4.142487049102783,
"logps/generated": -1349.896484375,
"logps/real": -569.4417724609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -349.24212646484375,
"rewards/margins": 328.8203430175781,
"rewards/real": -20.42179298400879,
"step": 1400
},
{
"epoch": 5.127272727272727,
"grad_norm": 0.0,
"learning_rate": 1.6161616161616162e-08,
"logits/generated": 3.408921003341675,
"logits/real": 3.993192195892334,
"logps/generated": -1857.1177978515625,
"logps/real": -529.3366088867188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -444.29205322265625,
"rewards/margins": 420.785888671875,
"rewards/real": -23.506126403808594,
"step": 1410
},
{
"epoch": 5.163636363636364,
"grad_norm": 4.017349771154928e-20,
"learning_rate": 1.5488215488215486e-08,
"logits/generated": 3.642667770385742,
"logits/real": 3.8170886039733887,
"logps/generated": -1371.3741455078125,
"logps/real": -572.6683349609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -362.79974365234375,
"rewards/margins": 334.07781982421875,
"rewards/real": -28.721933364868164,
"step": 1420
},
{
"epoch": 5.2,
"grad_norm": 1.3639315235721727e-16,
"learning_rate": 1.4814814814814813e-08,
"logits/generated": 3.6792221069335938,
"logits/real": 4.338517189025879,
"logps/generated": -1340.431884765625,
"logps/real": -553.2203369140625,
"loss": 0.0052,
"rewards/accuracies": 1.0,
"rewards/generated": -373.75408935546875,
"rewards/margins": 357.2299499511719,
"rewards/real": -16.524112701416016,
"step": 1430
},
{
"epoch": 5.236363636363636,
"grad_norm": 3.081123632921718e-23,
"learning_rate": 1.4141414141414141e-08,
"logits/generated": 3.450949192047119,
"logits/real": 3.970660448074341,
"logps/generated": -1683.385986328125,
"logps/real": -535.0615234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -463.681884765625,
"rewards/margins": 442.11138916015625,
"rewards/real": -21.570537567138672,
"step": 1440
},
{
"epoch": 5.2727272727272725,
"grad_norm": 3.0728626723669876e-21,
"learning_rate": 1.3468013468013468e-08,
"logits/generated": 3.5929648876190186,
"logits/real": 4.298793792724609,
"logps/generated": -1646.7025146484375,
"logps/real": -518.2625122070312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -394.73199462890625,
"rewards/margins": 378.03753662109375,
"rewards/real": -16.694438934326172,
"step": 1450
},
{
"epoch": 5.309090909090909,
"grad_norm": 4.0070865257592495e-32,
"learning_rate": 1.2794612794612795e-08,
"logits/generated": 3.2639718055725098,
"logits/real": 3.8822433948516846,
"logps/generated": -1574.0999755859375,
"logps/real": -550.0856323242188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -377.2677917480469,
"rewards/margins": 360.36578369140625,
"rewards/real": -16.902013778686523,
"step": 1460
},
{
"epoch": 5.345454545454546,
"grad_norm": 1.9271475273942652e-15,
"learning_rate": 1.2121212121212122e-08,
"logits/generated": 3.7863450050354004,
"logits/real": 4.053950786590576,
"logps/generated": -1627.314208984375,
"logps/real": -525.3707275390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -400.48150634765625,
"rewards/margins": 378.9859313964844,
"rewards/real": -21.495540618896484,
"step": 1470
},
{
"epoch": 5.381818181818182,
"grad_norm": 0.0,
"learning_rate": 1.1447811447811448e-08,
"logits/generated": 3.581354856491089,
"logits/real": 3.8952338695526123,
"logps/generated": -1557.240966796875,
"logps/real": -485.9366149902344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -376.5646057128906,
"rewards/margins": 362.3238220214844,
"rewards/real": -14.240735054016113,
"step": 1480
},
{
"epoch": 5.418181818181818,
"grad_norm": 0.0,
"learning_rate": 1.0774410774410774e-08,
"logits/generated": 3.418860673904419,
"logits/real": 4.168183326721191,
"logps/generated": -1425.8209228515625,
"logps/real": -488.4073181152344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -397.9587097167969,
"rewards/margins": 378.22540283203125,
"rewards/real": -19.733327865600586,
"step": 1490
},
{
"epoch": 5.454545454545454,
"grad_norm": 0.0,
"learning_rate": 1.01010101010101e-08,
"logits/generated": 3.3258609771728516,
"logits/real": 3.9882960319519043,
"logps/generated": -1420.126708984375,
"logps/real": -562.3682250976562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -379.91461181640625,
"rewards/margins": 360.4001770019531,
"rewards/real": -19.51443099975586,
"step": 1500
},
{
"epoch": 5.490909090909091,
"grad_norm": 1.2370404728759755e-12,
"learning_rate": 9.427609427609427e-09,
"logits/generated": 3.9953675270080566,
"logits/real": 4.288315773010254,
"logps/generated": -1769.1597900390625,
"logps/real": -541.0712890625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -395.64190673828125,
"rewards/margins": 376.20367431640625,
"rewards/real": -19.43826675415039,
"step": 1510
},
{
"epoch": 5.527272727272727,
"grad_norm": 3.606466171226265e-24,
"learning_rate": 8.754208754208754e-09,
"logits/generated": 3.4135119915008545,
"logits/real": 4.049647808074951,
"logps/generated": -1462.7747802734375,
"logps/real": -554.20849609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -423.81182861328125,
"rewards/margins": 400.8638610839844,
"rewards/real": -22.94796371459961,
"step": 1520
},
{
"epoch": 5.5636363636363635,
"grad_norm": 6.1808472626225334e-27,
"learning_rate": 8.080808080808081e-09,
"logits/generated": 3.4834237098693848,
"logits/real": 3.9518351554870605,
"logps/generated": -1418.6181640625,
"logps/real": -531.7501220703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -375.73388671875,
"rewards/margins": 351.1172790527344,
"rewards/real": -24.616636276245117,
"step": 1530
},
{
"epoch": 5.6,
"grad_norm": 7.484860551860185e-19,
"learning_rate": 7.407407407407406e-09,
"logits/generated": 3.5700645446777344,
"logits/real": 3.8483328819274902,
"logps/generated": -1562.0670166015625,
"logps/real": -544.864501953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -423.4938049316406,
"rewards/margins": 397.80877685546875,
"rewards/real": -25.68499755859375,
"step": 1540
},
{
"epoch": 5.636363636363637,
"grad_norm": 2.060032570194185e-26,
"learning_rate": 6.734006734006734e-09,
"logits/generated": 3.778414249420166,
"logits/real": 4.111809253692627,
"logps/generated": -1598.574951171875,
"logps/real": -540.5823364257812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -392.5323486328125,
"rewards/margins": 368.44403076171875,
"rewards/real": -24.08827018737793,
"step": 1550
},
{
"epoch": 5.672727272727273,
"grad_norm": 2.891660272317984e-18,
"learning_rate": 6.060606060606061e-09,
"logits/generated": 3.6908371448516846,
"logits/real": 3.9306640625,
"logps/generated": -1735.0189208984375,
"logps/real": -525.3858032226562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -403.2166748046875,
"rewards/margins": 380.7942199707031,
"rewards/real": -22.42244529724121,
"step": 1560
},
{
"epoch": 5.709090909090909,
"grad_norm": 0.5773051194749282,
"learning_rate": 5.387205387205387e-09,
"logits/generated": 3.379422426223755,
"logits/real": 3.869427442550659,
"logps/generated": -1555.796142578125,
"logps/real": -574.700927734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -379.06866455078125,
"rewards/margins": 355.59228515625,
"rewards/real": -23.47635269165039,
"step": 1570
},
{
"epoch": 5.745454545454545,
"grad_norm": 3.9363255052487036e-30,
"learning_rate": 4.713804713804714e-09,
"logits/generated": 3.4176697731018066,
"logits/real": 3.742943525314331,
"logps/generated": -1533.4945068359375,
"logps/real": -517.671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -388.6910705566406,
"rewards/margins": 369.216796875,
"rewards/real": -19.474323272705078,
"step": 1580
},
{
"epoch": 5.781818181818182,
"grad_norm": 1.674243115474082e-06,
"learning_rate": 4.0404040404040405e-09,
"logits/generated": 3.578691005706787,
"logits/real": 4.240370750427246,
"logps/generated": -1983.2786865234375,
"logps/real": -543.065185546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -423.01593017578125,
"rewards/margins": 398.25390625,
"rewards/real": -24.76194953918457,
"step": 1590
},
{
"epoch": 5.818181818181818,
"grad_norm": 1.5404600960548044e-09,
"learning_rate": 3.367003367003367e-09,
"logits/generated": 3.7445895671844482,
"logits/real": 3.5675225257873535,
"logps/generated": -2012.541259765625,
"logps/real": -534.4871215820312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -425.9609375,
"rewards/margins": 397.54046630859375,
"rewards/real": -28.42045021057129,
"step": 1600
},
{
"epoch": 5.8545454545454545,
"grad_norm": 2.3164162133427578e-17,
"learning_rate": 2.6936026936026934e-09,
"logits/generated": 3.759115219116211,
"logits/real": 3.735142946243286,
"logps/generated": -1591.992919921875,
"logps/real": -514.6450805664062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -417.63653564453125,
"rewards/margins": 393.0400390625,
"rewards/real": -24.596487045288086,
"step": 1610
},
{
"epoch": 5.890909090909091,
"grad_norm": 3.533525435981294e-29,
"learning_rate": 2.0202020202020203e-09,
"logits/generated": 3.510359287261963,
"logits/real": 3.4505932331085205,
"logps/generated": -1413.510986328125,
"logps/real": -598.2730712890625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -406.50311279296875,
"rewards/margins": 370.1054992675781,
"rewards/real": -36.397682189941406,
"step": 1620
},
{
"epoch": 5.927272727272728,
"grad_norm": 1.793526514420287e-22,
"learning_rate": 1.3468013468013467e-09,
"logits/generated": 3.410717010498047,
"logits/real": 3.9544615745544434,
"logps/generated": -1549.203369140625,
"logps/real": -573.8199462890625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -372.239501953125,
"rewards/margins": 344.86798095703125,
"rewards/real": -27.37152099609375,
"step": 1630
},
{
"epoch": 5.963636363636364,
"grad_norm": 9.588588952918977e-23,
"learning_rate": 6.734006734006734e-10,
"logits/generated": 3.7320778369903564,
"logits/real": 4.073027610778809,
"logps/generated": -1287.1448974609375,
"logps/real": -473.1319885253906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -355.93621826171875,
"rewards/margins": 333.669189453125,
"rewards/real": -22.26704978942871,
"step": 1640
},
{
"epoch": 6.0,
"grad_norm": 3.4581900765324e-15,
"learning_rate": 0.0,
"logits/generated": 3.713982105255127,
"logits/real": 3.768115997314453,
"logps/generated": -1897.5390625,
"logps/real": -579.6484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/generated": -457.3140563964844,
"rewards/margins": 419.3667907714844,
"rewards/real": -37.947288513183594,
"step": 1650
},
{
"epoch": 6.0,
"step": 1650,
"total_flos": 0.0,
"train_loss": 0.16329280181607425,
"train_runtime": 21629.7145,
"train_samples_per_second": 4.882,
"train_steps_per_second": 0.076
}
],
"logging_steps": 10,
"max_steps": 1650,
"num_input_tokens_seen": 0,
"num_train_epochs": 6,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}