| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.0, |
| "eval_steps": 100, |
| "global_step": 1650, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0036363636363636364, |
| "grad_norm": 161867.6203244379, |
| "learning_rate": 6.06060606060606e-10, |
| "logits/generated": 3.250883102416992, |
| "logits/real": 3.3663041591644287, |
| "logps/generated": -1246.279052734375, |
| "logps/real": -577.5853881835938, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/generated": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/real": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03636363636363636, |
| "grad_norm": 65759.84739536181, |
| "learning_rate": 6.060606060606061e-09, |
| "logits/generated": 3.811872720718384, |
| "logits/real": 3.2956764698028564, |
| "logps/generated": -1554.837158203125, |
| "logps/real": -466.0841064453125, |
| "loss": 7.4107, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/generated": -13.777423858642578, |
| "rewards/margins": 11.481453895568848, |
| "rewards/real": -2.295969247817993, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07272727272727272, |
| "grad_norm": 158533.00394960475, |
| "learning_rate": 1.2121212121212122e-08, |
| "logits/generated": 3.680910587310791, |
| "logits/real": 3.807919979095459, |
| "logps/generated": -1449.9287109375, |
| "logps/real": -537.6838989257812, |
| "loss": 8.3198, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/generated": -14.788434982299805, |
| "rewards/margins": 12.738945007324219, |
| "rewards/real": -2.0494906902313232, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10909090909090909, |
| "grad_norm": 44559.67859277519, |
| "learning_rate": 1.818181818181818e-08, |
| "logits/generated": 3.533531904220581, |
| "logits/real": 4.045346736907959, |
| "logps/generated": -1542.246337890625, |
| "logps/real": -596.746337890625, |
| "loss": 5.2618, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/generated": -28.993526458740234, |
| "rewards/margins": 23.528636932373047, |
| "rewards/real": -5.464890956878662, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14545454545454545, |
| "grad_norm": 24311.04160747661, |
| "learning_rate": 2.4242424242424243e-08, |
| "logits/generated": 3.2833874225616455, |
| "logits/real": 3.8217270374298096, |
| "logps/generated": -1298.0233154296875, |
| "logps/real": -616.4668579101562, |
| "loss": 2.1816, |
| "rewards/accuracies": 0.875, |
| "rewards/generated": -46.675140380859375, |
| "rewards/margins": 37.830352783203125, |
| "rewards/real": -8.844793319702148, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 9511.781352491278, |
| "learning_rate": 3.0303030303030305e-08, |
| "logits/generated": 3.39762544631958, |
| "logits/real": 4.15239143371582, |
| "logps/generated": -1378.459228515625, |
| "logps/real": -505.8817443847656, |
| "loss": 0.5522, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/generated": -107.13529205322266, |
| "rewards/margins": 85.6633529663086, |
| "rewards/real": -21.471942901611328, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21818181818181817, |
| "grad_norm": 7819.324676656772, |
| "learning_rate": 3.636363636363636e-08, |
| "logits/generated": 3.292616605758667, |
| "logits/real": 3.9783637523651123, |
| "logps/generated": -1550.360107421875, |
| "logps/real": -536.5904541015625, |
| "loss": 0.4931, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/generated": -114.218017578125, |
| "rewards/margins": 97.15935516357422, |
| "rewards/real": -17.058650970458984, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2545454545454545, |
| "grad_norm": 5304.581511521464, |
| "learning_rate": 4.242424242424242e-08, |
| "logits/generated": 3.7238917350769043, |
| "logits/real": 3.9821434020996094, |
| "logps/generated": -1807.1488037109375, |
| "logps/real": -562.0470581054688, |
| "loss": 0.4211, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/generated": -135.7723388671875, |
| "rewards/margins": 114.70655822753906, |
| "rewards/real": -21.065773010253906, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2909090909090909, |
| "grad_norm": 1090.3810930155685, |
| "learning_rate": 4.8484848484848486e-08, |
| "logits/generated": 4.115612506866455, |
| "logits/real": 3.496765613555908, |
| "logps/generated": -1947.6031494140625, |
| "logps/real": -505.0247497558594, |
| "loss": 0.2413, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -153.8606719970703, |
| "rewards/margins": 132.05270385742188, |
| "rewards/real": -21.80796241760254, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.32727272727272727, |
| "grad_norm": 0.0028666690842750952, |
| "learning_rate": 5.454545454545454e-08, |
| "logits/generated": 3.084749937057495, |
| "logits/real": 3.830709457397461, |
| "logps/generated": -1386.0224609375, |
| "logps/real": -544.002685546875, |
| "loss": 0.0918, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -187.54513549804688, |
| "rewards/margins": 168.12576293945312, |
| "rewards/real": -19.419374465942383, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 2548.034315563765, |
| "learning_rate": 6.060606060606061e-08, |
| "logits/generated": 3.542879581451416, |
| "logits/real": 3.966137647628784, |
| "logps/generated": -1731.931884765625, |
| "logps/real": -544.5482788085938, |
| "loss": 0.0298, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -183.2211151123047, |
| "rewards/margins": 162.5536346435547, |
| "rewards/real": -20.6674747467041, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 9125.456952119537, |
| "learning_rate": 6.666666666666665e-08, |
| "logits/generated": 3.551018476486206, |
| "logits/real": 4.005837440490723, |
| "logps/generated": -1565.452880859375, |
| "logps/real": -569.3267822265625, |
| "loss": 0.1962, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -193.08192443847656, |
| "rewards/margins": 174.71852111816406, |
| "rewards/real": -18.363407135009766, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.43636363636363634, |
| "grad_norm": 43.62923605034956, |
| "learning_rate": 7.272727272727273e-08, |
| "logits/generated": 4.0249834060668945, |
| "logits/real": 4.246652603149414, |
| "logps/generated": -1703.650634765625, |
| "logps/real": -472.86328125, |
| "loss": 0.1856, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -194.37936401367188, |
| "rewards/margins": 174.404052734375, |
| "rewards/real": -19.97530174255371, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4727272727272727, |
| "grad_norm": 1.3324222147148856e-07, |
| "learning_rate": 7.878787878787878e-08, |
| "logits/generated": 3.621720790863037, |
| "logits/real": 4.0656514167785645, |
| "logps/generated": -1639.7611083984375, |
| "logps/real": -544.0973510742188, |
| "loss": 0.0656, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -217.6341094970703, |
| "rewards/margins": 197.27743530273438, |
| "rewards/real": -20.356674194335938, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.509090909090909, |
| "grad_norm": 7274.262763325892, |
| "learning_rate": 8.484848484848484e-08, |
| "logits/generated": 3.970414400100708, |
| "logits/real": 3.919184923171997, |
| "logps/generated": -1788.6624755859375, |
| "logps/real": -516.3016357421875, |
| "loss": 0.1568, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -210.6239471435547, |
| "rewards/margins": 190.55148315429688, |
| "rewards/real": -20.072477340698242, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.0002880250676340669, |
| "learning_rate": 9.09090909090909e-08, |
| "logits/generated": 3.3956267833709717, |
| "logits/real": 3.999126434326172, |
| "logps/generated": -1413.697021484375, |
| "logps/real": -508.56927490234375, |
| "loss": 0.1265, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -262.8182067871094, |
| "rewards/margins": 238.43539428710938, |
| "rewards/real": -24.3828182220459, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5818181818181818, |
| "grad_norm": 8.356960095641936e-12, |
| "learning_rate": 9.696969696969697e-08, |
| "logits/generated": 3.9105753898620605, |
| "logits/real": 4.2755632400512695, |
| "logps/generated": -1530.80615234375, |
| "logps/real": -514.7744140625, |
| "loss": 0.4278, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -214.5561981201172, |
| "rewards/margins": 194.1567840576172, |
| "rewards/real": -20.399410247802734, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6181818181818182, |
| "grad_norm": 2.1905714551153064e-05, |
| "learning_rate": 9.966329966329967e-08, |
| "logits/generated": 4.027784824371338, |
| "logits/real": 3.9190127849578857, |
| "logps/generated": -1677.461669921875, |
| "logps/real": -529.8884887695312, |
| "loss": 0.0288, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -222.0593719482422, |
| "rewards/margins": 200.66824340820312, |
| "rewards/real": -21.39112663269043, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6545454545454545, |
| "grad_norm": 4554.734421731353, |
| "learning_rate": 9.898989898989899e-08, |
| "logits/generated": 3.6708030700683594, |
| "logits/real": 3.8833823204040527, |
| "logps/generated": -1624.985595703125, |
| "logps/real": -475.5914001464844, |
| "loss": 0.1764, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -234.6440887451172, |
| "rewards/margins": 217.81668090820312, |
| "rewards/real": -16.827428817749023, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6909090909090909, |
| "grad_norm": 795.9220457751134, |
| "learning_rate": 9.83164983164983e-08, |
| "logits/generated": 3.776212692260742, |
| "logits/real": 3.720076084136963, |
| "logps/generated": -1932.6201171875, |
| "logps/real": -496.31768798828125, |
| "loss": 0.0109, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -242.80508422851562, |
| "rewards/margins": 221.8138427734375, |
| "rewards/real": -20.991230010986328, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 7.948293162415954e-09, |
| "learning_rate": 9.764309764309763e-08, |
| "logits/generated": 3.4761600494384766, |
| "logits/real": 3.7583975791931152, |
| "logps/generated": -1729.9085693359375, |
| "logps/real": -629.2281494140625, |
| "loss": 0.0175, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -243.0241241455078, |
| "rewards/margins": 225.57907104492188, |
| "rewards/real": -17.445043563842773, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7636363636363637, |
| "grad_norm": 11854.565351399473, |
| "learning_rate": 9.696969696969697e-08, |
| "logits/generated": 3.1848533153533936, |
| "logits/real": 3.9498603343963623, |
| "logps/generated": -1566.5595703125, |
| "logps/real": -527.8526611328125, |
| "loss": 0.0562, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -251.7228546142578, |
| "rewards/margins": 224.41226196289062, |
| "rewards/real": -27.310577392578125, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 1.324414784532279e-09, |
| "learning_rate": 9.629629629629629e-08, |
| "logits/generated": 3.6429061889648438, |
| "logits/real": 4.131524085998535, |
| "logps/generated": -1664.998291015625, |
| "logps/real": -429.4391174316406, |
| "loss": 0.0953, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -265.4717712402344, |
| "rewards/margins": 244.45150756835938, |
| "rewards/real": -21.020299911499023, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8363636363636363, |
| "grad_norm": 2516.386914307099, |
| "learning_rate": 9.562289562289561e-08, |
| "logits/generated": 3.362239360809326, |
| "logits/real": 4.2897210121154785, |
| "logps/generated": -1774.610107421875, |
| "logps/real": -527.5950317382812, |
| "loss": 0.0604, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -281.13580322265625, |
| "rewards/margins": 258.14569091796875, |
| "rewards/real": -22.99008560180664, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8727272727272727, |
| "grad_norm": 4210.019236605774, |
| "learning_rate": 9.494949494949494e-08, |
| "logits/generated": 3.58394193649292, |
| "logits/real": 3.9923107624053955, |
| "logps/generated": -1617.4654541015625, |
| "logps/real": -498.33056640625, |
| "loss": 0.1084, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -241.74960327148438, |
| "rewards/margins": 225.633056640625, |
| "rewards/real": -16.116552352905273, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 3.992364816128141e-15, |
| "learning_rate": 9.427609427609428e-08, |
| "logits/generated": 3.3853213787078857, |
| "logits/real": 3.808389186859131, |
| "logps/generated": -1583.3477783203125, |
| "logps/real": -526.8497314453125, |
| "loss": 0.0632, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -252.41494750976562, |
| "rewards/margins": 236.5117645263672, |
| "rewards/real": -15.90319538116455, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9454545454545454, |
| "grad_norm": 0.06379282101245505, |
| "learning_rate": 9.36026936026936e-08, |
| "logits/generated": 3.554725170135498, |
| "logits/real": 3.842238664627075, |
| "logps/generated": -1643.524658203125, |
| "logps/real": -493.2911682128906, |
| "loss": 0.0565, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -256.9104309082031, |
| "rewards/margins": 242.4754638671875, |
| "rewards/real": -14.434976577758789, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9818181818181818, |
| "grad_norm": 2.1316518426288745e-15, |
| "learning_rate": 9.292929292929292e-08, |
| "logits/generated": 3.709350109100342, |
| "logits/real": 3.892772674560547, |
| "logps/generated": -1499.9554443359375, |
| "logps/real": -514.9131469726562, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -262.5176086425781, |
| "rewards/margins": 242.5634002685547, |
| "rewards/real": -19.954198837280273, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.018181818181818, |
| "grad_norm": 2.144913603858407e-16, |
| "learning_rate": 9.225589225589225e-08, |
| "logits/generated": 3.527078151702881, |
| "logits/real": 4.176957130432129, |
| "logps/generated": -1430.626953125, |
| "logps/real": -487.687744140625, |
| "loss": 0.0475, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -263.19024658203125, |
| "rewards/margins": 248.2545166015625, |
| "rewards/real": -14.93572998046875, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0545454545454545, |
| "grad_norm": 9.262988835020684e-16, |
| "learning_rate": 9.158249158249159e-08, |
| "logits/generated": 3.8879787921905518, |
| "logits/real": 3.7888145446777344, |
| "logps/generated": -1669.9515380859375, |
| "logps/real": -599.4093627929688, |
| "loss": 0.022, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -260.335693359375, |
| "rewards/margins": 241.45071411132812, |
| "rewards/real": -18.88497543334961, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0909090909090908, |
| "grad_norm": 1.632491167864082e-15, |
| "learning_rate": 9.09090909090909e-08, |
| "logits/generated": 3.4893486499786377, |
| "logits/real": 4.106193542480469, |
| "logps/generated": -1454.445068359375, |
| "logps/real": -541.4019775390625, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -303.18951416015625, |
| "rewards/margins": 274.5284423828125, |
| "rewards/real": -28.661075592041016, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1272727272727272, |
| "grad_norm": 1.3391428570009603e-26, |
| "learning_rate": 9.023569023569023e-08, |
| "logits/generated": 3.674029588699341, |
| "logits/real": 4.17787504196167, |
| "logps/generated": -1690.72265625, |
| "logps/real": -436.976318359375, |
| "loss": 0.0069, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -316.36273193359375, |
| "rewards/margins": 290.0094299316406, |
| "rewards/real": -26.353296279907227, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.1636363636363636, |
| "grad_norm": 0.00020046209172408023, |
| "learning_rate": 8.956228956228956e-08, |
| "logits/generated": 3.833738327026367, |
| "logits/real": 3.479259967803955, |
| "logps/generated": -1615.9827880859375, |
| "logps/real": -502.75750732421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -280.38818359375, |
| "rewards/margins": 261.127685546875, |
| "rewards/real": -19.260507583618164, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 1.621254944519754e-22, |
| "learning_rate": 8.888888888888888e-08, |
| "logits/generated": 3.559558868408203, |
| "logits/real": 4.057076454162598, |
| "logps/generated": -1680.08203125, |
| "logps/real": -491.20831298828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -297.2218017578125, |
| "rewards/margins": 271.63812255859375, |
| "rewards/real": -25.583667755126953, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.2363636363636363, |
| "grad_norm": 1.6991550894734641e-12, |
| "learning_rate": 8.821548821548821e-08, |
| "logits/generated": 3.3086647987365723, |
| "logits/real": 4.217398643493652, |
| "logps/generated": -1606.6771240234375, |
| "logps/real": -510.59234619140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -320.9579772949219, |
| "rewards/margins": 285.43359375, |
| "rewards/real": -35.524383544921875, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.2727272727272727, |
| "grad_norm": 2.818660541318421e-27, |
| "learning_rate": 8.754208754208754e-08, |
| "logits/generated": 3.3664917945861816, |
| "logits/real": 4.047441005706787, |
| "logps/generated": -1408.769775390625, |
| "logps/real": -509.589111328125, |
| "loss": 0.1804, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -286.76177978515625, |
| "rewards/margins": 259.9476013183594, |
| "rewards/real": -26.814172744750977, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.309090909090909, |
| "grad_norm": 0.0007089825415081091, |
| "learning_rate": 8.686868686868686e-08, |
| "logits/generated": 3.5507493019104004, |
| "logits/real": 3.4634432792663574, |
| "logps/generated": -1446.5318603515625, |
| "logps/real": -544.6498413085938, |
| "loss": 0.1174, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/generated": -265.806396484375, |
| "rewards/margins": 238.1927032470703, |
| "rewards/real": -27.613704681396484, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.3454545454545455, |
| "grad_norm": 1.1305549325804168e-08, |
| "learning_rate": 8.619528619528619e-08, |
| "logits/generated": 3.628211259841919, |
| "logits/real": 3.5758774280548096, |
| "logps/generated": -1428.930908203125, |
| "logps/real": -562.1917114257812, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -255.12948608398438, |
| "rewards/margins": 239.59548950195312, |
| "rewards/real": -15.534004211425781, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.3818181818181818, |
| "grad_norm": 1.0350994829795507e-05, |
| "learning_rate": 8.552188552188552e-08, |
| "logits/generated": 3.7924466133117676, |
| "logits/real": 3.9518685340881348, |
| "logps/generated": -1539.1009521484375, |
| "logps/real": -527.0864868164062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -264.60162353515625, |
| "rewards/margins": 240.48681640625, |
| "rewards/real": -24.114810943603516, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.4181818181818182, |
| "grad_norm": 0.36389886787684217, |
| "learning_rate": 8.484848484848484e-08, |
| "logits/generated": 3.520686626434326, |
| "logits/real": 4.119534492492676, |
| "logps/generated": -1430.095458984375, |
| "logps/real": -599.5445556640625, |
| "loss": 0.0589, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -281.20819091796875, |
| "rewards/margins": 268.37298583984375, |
| "rewards/real": -12.835179328918457, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 2.8324502759319933e-05, |
| "learning_rate": 8.417508417508418e-08, |
| "logits/generated": 3.5009918212890625, |
| "logits/real": 3.8314614295959473, |
| "logps/generated": -1441.1917724609375, |
| "logps/real": -531.7547607421875, |
| "loss": 0.0416, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -252.5347442626953, |
| "rewards/margins": 230.76644897460938, |
| "rewards/real": -21.7683162689209, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.490909090909091, |
| "grad_norm": 5094.3564893468065, |
| "learning_rate": 8.35016835016835e-08, |
| "logits/generated": 3.4917893409729004, |
| "logits/real": 3.65199613571167, |
| "logps/generated": -1599.940185546875, |
| "logps/real": -547.7235107421875, |
| "loss": 0.0251, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -271.635498046875, |
| "rewards/margins": 244.7548370361328, |
| "rewards/real": -26.880685806274414, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.5272727272727273, |
| "grad_norm": 6.718966014450532e-14, |
| "learning_rate": 8.282828282828282e-08, |
| "logits/generated": 3.1480331420898438, |
| "logits/real": 4.2137770652771, |
| "logps/generated": -1394.23095703125, |
| "logps/real": -533.8201904296875, |
| "loss": 0.0282, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -256.77789306640625, |
| "rewards/margins": 245.2924041748047, |
| "rewards/real": -11.485471725463867, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.5636363636363635, |
| "grad_norm": 1.1949041372307487e-08, |
| "learning_rate": 8.215488215488215e-08, |
| "logits/generated": 3.9812686443328857, |
| "logits/real": 3.8523590564727783, |
| "logps/generated": -1250.764404296875, |
| "logps/real": -469.63525390625, |
| "loss": 0.0252, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -210.96096801757812, |
| "rewards/margins": 207.98599243164062, |
| "rewards/real": -2.974966049194336, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 5.239520709960669e-07, |
| "learning_rate": 8.148148148148148e-08, |
| "logits/generated": 3.438265562057495, |
| "logits/real": 3.952139377593994, |
| "logps/generated": -1334.8717041015625, |
| "logps/real": -479.496337890625, |
| "loss": 0.039, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -214.3938751220703, |
| "rewards/margins": 214.3829345703125, |
| "rewards/real": -0.01094741839915514, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.6363636363636362, |
| "grad_norm": 1.3071416231646523e-15, |
| "learning_rate": 8.08080808080808e-08, |
| "logits/generated": 3.3865771293640137, |
| "logits/real": 3.869033098220825, |
| "logps/generated": -1464.185546875, |
| "logps/real": -487.26104736328125, |
| "loss": 0.0099, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -220.9183349609375, |
| "rewards/margins": 223.3875732421875, |
| "rewards/real": 2.469271183013916, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.6727272727272728, |
| "grad_norm": 5.809273751552379e-27, |
| "learning_rate": 8.013468013468013e-08, |
| "logits/generated": 3.5787856578826904, |
| "logits/real": 4.100471496582031, |
| "logps/generated": -1492.1751708984375, |
| "logps/real": -459.5709533691406, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -271.4466247558594, |
| "rewards/margins": 263.6124572753906, |
| "rewards/real": -7.834146022796631, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.709090909090909, |
| "grad_norm": 1.099730928036333e-14, |
| "learning_rate": 7.946127946127946e-08, |
| "logits/generated": 3.8285858631134033, |
| "logits/real": 4.042223930358887, |
| "logps/generated": -1498.905029296875, |
| "logps/real": -461.4064025878906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -297.97821044921875, |
| "rewards/margins": 278.54345703125, |
| "rewards/real": -19.434696197509766, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.7454545454545456, |
| "grad_norm": 1.0056219855024129e-26, |
| "learning_rate": 7.878787878787878e-08, |
| "logits/generated": 3.902775287628174, |
| "logits/real": 4.0818986892700195, |
| "logps/generated": -1425.9718017578125, |
| "logps/real": -498.4969787597656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -269.1473083496094, |
| "rewards/margins": 257.8677062988281, |
| "rewards/real": -11.279605865478516, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.7818181818181817, |
| "grad_norm": 1.4824234977472797e-27, |
| "learning_rate": 7.811447811447811e-08, |
| "logits/generated": 3.4052627086639404, |
| "logits/real": 3.743426561355591, |
| "logps/generated": -1479.702392578125, |
| "logps/real": -476.9375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -264.81494140625, |
| "rewards/margins": 250.8484344482422, |
| "rewards/real": -13.966524124145508, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.0001152440497230488, |
| "learning_rate": 7.744107744107744e-08, |
| "logits/generated": 3.205287218093872, |
| "logits/real": 4.380854606628418, |
| "logps/generated": -1461.5018310546875, |
| "logps/real": -577.3456420898438, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -283.0616149902344, |
| "rewards/margins": 271.5289001464844, |
| "rewards/real": -11.532726287841797, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.8545454545454545, |
| "grad_norm": 9.961164548824944e-16, |
| "learning_rate": 7.676767676767677e-08, |
| "logits/generated": 3.8037960529327393, |
| "logits/real": 4.577506065368652, |
| "logps/generated": -1738.124755859375, |
| "logps/real": -426.62908935546875, |
| "loss": 0.0211, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -278.2953186035156, |
| "rewards/margins": 274.4579162597656, |
| "rewards/real": -3.8374037742614746, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.8909090909090909, |
| "grad_norm": 1.5112555232080375e-25, |
| "learning_rate": 7.609427609427609e-08, |
| "logits/generated": 3.249746799468994, |
| "logits/real": 4.040205955505371, |
| "logps/generated": -1564.7763671875, |
| "logps/real": -596.6712646484375, |
| "loss": 0.0082, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -282.82269287109375, |
| "rewards/margins": 289.08319091796875, |
| "rewards/real": 6.260525703430176, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.9272727272727272, |
| "grad_norm": 2.544045781036373e-31, |
| "learning_rate": 7.542087542087542e-08, |
| "logits/generated": 3.76676607131958, |
| "logits/real": 3.9748542308807373, |
| "logps/generated": -1476.042724609375, |
| "logps/real": -493.10223388671875, |
| "loss": 0.0039, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -213.38961791992188, |
| "rewards/margins": 221.9373779296875, |
| "rewards/real": 8.547750473022461, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.9636363636363636, |
| "grad_norm": 1.1070566434496538e-14, |
| "learning_rate": 7.474747474747475e-08, |
| "logits/generated": 3.9607677459716797, |
| "logits/real": 4.024069786071777, |
| "logps/generated": -1877.058837890625, |
| "logps/real": -491.959228515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -255.7445068359375, |
| "rewards/margins": 262.62823486328125, |
| "rewards/real": 6.883699893951416, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 961.3469663092699, |
| "learning_rate": 7.407407407407407e-08, |
| "logits/generated": 3.349459409713745, |
| "logits/real": 4.006588935852051, |
| "logps/generated": -1369.419677734375, |
| "logps/real": -464.02130126953125, |
| "loss": 0.0024, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -222.8209991455078, |
| "rewards/margins": 233.3080291748047, |
| "rewards/real": 10.487030029296875, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.036363636363636, |
| "grad_norm": 9.057600032535732e-10, |
| "learning_rate": 7.34006734006734e-08, |
| "logits/generated": 3.6556448936462402, |
| "logits/real": 3.9517414569854736, |
| "logps/generated": -1621.7646484375, |
| "logps/real": -549.6907348632812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -277.4305725097656, |
| "rewards/margins": 283.687255859375, |
| "rewards/real": 6.2567033767700195, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.0727272727272728, |
| "grad_norm": 7.139382445177019e-20, |
| "learning_rate": 7.272727272727273e-08, |
| "logits/generated": 3.4937987327575684, |
| "logits/real": 4.038242340087891, |
| "logps/generated": -1512.7313232421875, |
| "logps/real": -530.453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -251.084716796875, |
| "rewards/margins": 252.330810546875, |
| "rewards/real": 1.2460654973983765, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.109090909090909, |
| "grad_norm": 3.2270331679056034e-19, |
| "learning_rate": 7.205387205387205e-08, |
| "logits/generated": 3.7572948932647705, |
| "logits/real": 4.378727912902832, |
| "logps/generated": -1412.48388671875, |
| "logps/real": -465.0306701660156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -261.629638671875, |
| "rewards/margins": 273.5119323730469, |
| "rewards/real": 11.882287979125977, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.1454545454545455, |
| "grad_norm": 4.775871749612275e-12, |
| "learning_rate": 7.138047138047138e-08, |
| "logits/generated": 3.9829063415527344, |
| "logits/real": 4.248471260070801, |
| "logps/generated": -1897.7740478515625, |
| "logps/real": -450.75146484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -273.78240966796875, |
| "rewards/margins": 273.58770751953125, |
| "rewards/real": -0.1947340965270996, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.1818181818181817, |
| "grad_norm": 0.43906412049943816, |
| "learning_rate": 7.070707070707071e-08, |
| "logits/generated": 3.9363341331481934, |
| "logits/real": 4.091122150421143, |
| "logps/generated": -1581.771728515625, |
| "logps/real": -453.38946533203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -227.46475219726562, |
| "rewards/margins": 232.3105926513672, |
| "rewards/real": 4.845867156982422, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.2181818181818183, |
| "grad_norm": 4.003271106180214e-16, |
| "learning_rate": 7.003367003367003e-08, |
| "logits/generated": 3.628199815750122, |
| "logits/real": 3.911200761795044, |
| "logps/generated": -1624.291015625, |
| "logps/real": -534.2811889648438, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -305.51153564453125, |
| "rewards/margins": 298.5929870605469, |
| "rewards/real": -6.918566703796387, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.2545454545454544, |
| "grad_norm": 4.825557199789743e-16, |
| "learning_rate": 6.936026936026935e-08, |
| "logits/generated": 4.049774646759033, |
| "logits/real": 4.147176265716553, |
| "logps/generated": -1385.0206298828125, |
| "logps/real": -495.1424255371094, |
| "loss": 0.0062, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -254.9941864013672, |
| "rewards/margins": 243.9451141357422, |
| "rewards/real": -11.049083709716797, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.290909090909091, |
| "grad_norm": 9.384413995054146e-15, |
| "learning_rate": 6.868686868686869e-08, |
| "logits/generated": 3.82362699508667, |
| "logits/real": 3.9312844276428223, |
| "logps/generated": -1712.203125, |
| "logps/real": -509.4066467285156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -359.2261657714844, |
| "rewards/margins": 337.8033752441406, |
| "rewards/real": -21.422758102416992, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.327272727272727, |
| "grad_norm": 1.6226130118471855e-20, |
| "learning_rate": 6.801346801346801e-08, |
| "logits/generated": 3.2555956840515137, |
| "logits/real": 3.4989371299743652, |
| "logps/generated": -1550.715087890625, |
| "logps/real": -534.7842407226562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -362.5604553222656, |
| "rewards/margins": 341.76409912109375, |
| "rewards/real": -20.79628562927246, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.3636363636363638, |
| "grad_norm": 2445.668734526786, |
| "learning_rate": 6.734006734006734e-08, |
| "logits/generated": 4.016414642333984, |
| "logits/real": 4.205541133880615, |
| "logps/generated": -1622.1700439453125, |
| "logps/real": -519.1929321289062, |
| "loss": 0.0011, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -359.9587097167969, |
| "rewards/margins": 332.77911376953125, |
| "rewards/real": -27.17962646484375, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.0, |
| "learning_rate": 6.666666666666665e-08, |
| "logits/generated": 3.48205304145813, |
| "logits/real": 3.7544798851013184, |
| "logps/generated": -1831.762451171875, |
| "logps/real": -557.1226806640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -348.6358947753906, |
| "rewards/margins": 326.20648193359375, |
| "rewards/real": -22.429412841796875, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.4363636363636365, |
| "grad_norm": 1.5853853765821598e-23, |
| "learning_rate": 6.5993265993266e-08, |
| "logits/generated": 3.557918071746826, |
| "logits/real": 3.428406238555908, |
| "logps/generated": -1539.081298828125, |
| "logps/real": -518.9280395507812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -370.35260009765625, |
| "rewards/margins": 344.65423583984375, |
| "rewards/real": -25.69829750061035, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.4727272727272727, |
| "grad_norm": 3.430128294202441e-08, |
| "learning_rate": 6.531986531986532e-08, |
| "logits/generated": 3.484684467315674, |
| "logits/real": 4.016026496887207, |
| "logps/generated": -1661.240478515625, |
| "logps/real": -544.1554565429688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -373.469970703125, |
| "rewards/margins": 352.1344909667969, |
| "rewards/real": -21.33551597595215, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.509090909090909, |
| "grad_norm": 0.0, |
| "learning_rate": 6.464646464646465e-08, |
| "logits/generated": 3.8870773315429688, |
| "logits/real": 4.1783223152160645, |
| "logps/generated": -1835.35546875, |
| "logps/real": -525.7906494140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -370.15228271484375, |
| "rewards/margins": 348.29742431640625, |
| "rewards/real": -21.854822158813477, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.5454545454545454, |
| "grad_norm": 1.485520291302884e-17, |
| "learning_rate": 6.397306397306396e-08, |
| "logits/generated": 3.5036914348602295, |
| "logits/real": 3.7754790782928467, |
| "logps/generated": -1453.15087890625, |
| "logps/real": -552.2473754882812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -319.9031982421875, |
| "rewards/margins": 297.2294006347656, |
| "rewards/real": -22.673809051513672, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.581818181818182, |
| "grad_norm": 9925.761806176399, |
| "learning_rate": 6.32996632996633e-08, |
| "logits/generated": 3.4248390197753906, |
| "logits/real": 4.034340858459473, |
| "logps/generated": -1423.414306640625, |
| "logps/real": -578.4390258789062, |
| "loss": 0.0493, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/generated": -330.52459716796875, |
| "rewards/margins": 309.222412109375, |
| "rewards/real": -21.302127838134766, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.618181818181818, |
| "grad_norm": 5.679867918386259e-29, |
| "learning_rate": 6.262626262626263e-08, |
| "logits/generated": 3.6940674781799316, |
| "logits/real": 4.145870685577393, |
| "logps/generated": -1283.764404296875, |
| "logps/real": -489.2547302246094, |
| "loss": 0.0185, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -298.9286193847656, |
| "rewards/margins": 290.1479797363281, |
| "rewards/real": -8.78062915802002, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.6545454545454543, |
| "grad_norm": 7.526005825335279e-20, |
| "learning_rate": 6.195286195286194e-08, |
| "logits/generated": 3.155702590942383, |
| "logits/real": 3.7637996673583984, |
| "logps/generated": -1467.9083251953125, |
| "logps/real": -546.5178833007812, |
| "loss": 0.0168, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -369.35992431640625, |
| "rewards/margins": 345.2546081542969, |
| "rewards/real": -24.10533332824707, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.690909090909091, |
| "grad_norm": 8.57224717026253e-28, |
| "learning_rate": 6.127946127946127e-08, |
| "logits/generated": 3.4195969104766846, |
| "logits/real": 3.9820468425750732, |
| "logps/generated": -1553.6126708984375, |
| "logps/real": -564.40087890625, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -379.32421875, |
| "rewards/margins": 362.53631591796875, |
| "rewards/real": -16.787899017333984, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 2.1648519421116784e-24, |
| "learning_rate": 6.060606060606061e-08, |
| "logits/generated": 3.6250579357147217, |
| "logits/real": 4.063885688781738, |
| "logps/generated": -1501.2998046875, |
| "logps/real": -615.9197998046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -342.18902587890625, |
| "rewards/margins": 327.02435302734375, |
| "rewards/real": -15.164652824401855, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.7636363636363637, |
| "grad_norm": 1.4069459552366719e-24, |
| "learning_rate": 5.993265993265994e-08, |
| "logits/generated": 3.341201066970825, |
| "logits/real": 3.9008383750915527, |
| "logps/generated": -1905.3343505859375, |
| "logps/real": -511.04498291015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -409.7513732910156, |
| "rewards/margins": 391.1456604003906, |
| "rewards/real": -18.605722427368164, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 3.0242780956024674e-07, |
| "learning_rate": 5.925925925925925e-08, |
| "logits/generated": 4.076157569885254, |
| "logits/real": 3.7209954261779785, |
| "logps/generated": -1816.0787353515625, |
| "logps/real": -485.0223693847656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -351.87799072265625, |
| "rewards/margins": 337.22857666015625, |
| "rewards/real": -14.649354934692383, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.8363636363636364, |
| "grad_norm": 1.2592245187280255e-18, |
| "learning_rate": 5.8585858585858584e-08, |
| "logits/generated": 3.7142958641052246, |
| "logits/real": 4.193110466003418, |
| "logps/generated": -1846.4498291015625, |
| "logps/real": -518.27392578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -395.49456787109375, |
| "rewards/margins": 373.5671081542969, |
| "rewards/real": -21.927494049072266, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.8727272727272726, |
| "grad_norm": 1.5298202167064291e-19, |
| "learning_rate": 5.791245791245791e-08, |
| "logits/generated": 3.505951404571533, |
| "logits/real": 4.056126117706299, |
| "logps/generated": -1407.225341796875, |
| "logps/real": -542.5260620117188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -360.354248046875, |
| "rewards/margins": 346.49407958984375, |
| "rewards/real": -13.860153198242188, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.909090909090909, |
| "grad_norm": 7.66403093195095e-35, |
| "learning_rate": 5.723905723905723e-08, |
| "logits/generated": 3.39953875541687, |
| "logits/real": 3.984081268310547, |
| "logps/generated": -1458.378173828125, |
| "logps/real": -513.1370849609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -357.29925537109375, |
| "rewards/margins": 334.2768249511719, |
| "rewards/real": -23.022445678710938, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.9454545454545453, |
| "grad_norm": 2.1123843398831784e-30, |
| "learning_rate": 5.6565656565656564e-08, |
| "logits/generated": 3.5056838989257812, |
| "logits/real": 3.719235897064209, |
| "logps/generated": -1713.8677978515625, |
| "logps/real": -522.7982177734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -361.1312561035156, |
| "rewards/margins": 344.48333740234375, |
| "rewards/real": -16.64789390563965, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.981818181818182, |
| "grad_norm": 8.833278672851137e-20, |
| "learning_rate": 5.589225589225589e-08, |
| "logits/generated": 3.532017469406128, |
| "logits/real": 4.0720086097717285, |
| "logps/generated": -1373.71630859375, |
| "logps/real": -477.915771484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -389.9593505859375, |
| "rewards/margins": 374.3216857910156, |
| "rewards/real": -15.637689590454102, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.018181818181818, |
| "grad_norm": 1.829241516518958e-07, |
| "learning_rate": 5.521885521885522e-08, |
| "logits/generated": 3.942246198654175, |
| "logits/real": 4.765660285949707, |
| "logps/generated": -1411.296875, |
| "logps/real": -509.1822204589844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -349.03863525390625, |
| "rewards/margins": 333.57861328125, |
| "rewards/real": -15.460012435913086, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.0545454545454547, |
| "grad_norm": 7.515486186427604e-28, |
| "learning_rate": 5.454545454545454e-08, |
| "logits/generated": 3.83001708984375, |
| "logits/real": 4.423516273498535, |
| "logps/generated": -1518.19921875, |
| "logps/real": -510.94927978515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -343.0306396484375, |
| "rewards/margins": 320.9806213378906, |
| "rewards/real": -22.049989700317383, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.090909090909091, |
| "grad_norm": 1.0019439073528928e-27, |
| "learning_rate": 5.387205387205387e-08, |
| "logits/generated": 3.4515597820281982, |
| "logits/real": 3.6501259803771973, |
| "logps/generated": -1589.603759765625, |
| "logps/real": -517.35205078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -361.349853515625, |
| "rewards/margins": 343.94830322265625, |
| "rewards/real": -17.401607513427734, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.1272727272727274, |
| "grad_norm": 5.331701377808202e-19, |
| "learning_rate": 5.31986531986532e-08, |
| "logits/generated": 3.70512056350708, |
| "logits/real": 3.7677884101867676, |
| "logps/generated": -1549.2159423828125, |
| "logps/real": -446.04559326171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -370.1104736328125, |
| "rewards/margins": 347.8051452636719, |
| "rewards/real": -22.305322647094727, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.1636363636363636, |
| "grad_norm": 1.678736301504087e-11, |
| "learning_rate": 5.2525252525252525e-08, |
| "logits/generated": 4.319781303405762, |
| "logits/real": 3.8676934242248535, |
| "logps/generated": -2044.4833984375, |
| "logps/real": -440.07586669921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -390.758056640625, |
| "rewards/margins": 374.1348571777344, |
| "rewards/real": -16.623199462890625, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 6.192331870232493e-20, |
| "learning_rate": 5.1851851851851846e-08, |
| "logits/generated": 3.774130344390869, |
| "logits/real": 3.9397501945495605, |
| "logps/generated": -1745.0888671875, |
| "logps/real": -427.6468811035156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -348.3378601074219, |
| "rewards/margins": 329.65423583984375, |
| "rewards/real": -18.683679580688477, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.2363636363636363, |
| "grad_norm": 4.132428468024897e-32, |
| "learning_rate": 5.117845117845118e-08, |
| "logits/generated": 3.667954921722412, |
| "logits/real": 3.71317982673645, |
| "logps/generated": -1767.097412109375, |
| "logps/real": -496.12274169921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -374.12109375, |
| "rewards/margins": 357.9183044433594, |
| "rewards/real": -16.202762603759766, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.2727272727272725, |
| "grad_norm": 1.6819218744361375e-14, |
| "learning_rate": 5.0505050505050506e-08, |
| "logits/generated": 3.7238926887512207, |
| "logits/real": 4.136239051818848, |
| "logps/generated": -1723.2008056640625, |
| "logps/real": -534.2486572265625, |
| "loss": 0.0403, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -420.83453369140625, |
| "rewards/margins": 403.14373779296875, |
| "rewards/real": -17.690826416015625, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.309090909090909, |
| "grad_norm": 1.075818913331155e-28, |
| "learning_rate": 4.983164983164983e-08, |
| "logits/generated": 3.950953245162964, |
| "logits/real": 4.087274074554443, |
| "logps/generated": -1642.2506103515625, |
| "logps/real": -428.4207458496094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -393.48870849609375, |
| "rewards/margins": 367.9131774902344, |
| "rewards/real": -25.575532913208008, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.3454545454545457, |
| "grad_norm": 1.8614782975594385e-13, |
| "learning_rate": 4.915824915824915e-08, |
| "logits/generated": 3.6677818298339844, |
| "logits/real": 4.1127095222473145, |
| "logps/generated": -1344.891845703125, |
| "logps/real": -514.3883056640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -368.61151123046875, |
| "rewards/margins": 346.4088439941406, |
| "rewards/real": -22.202638626098633, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.381818181818182, |
| "grad_norm": 3.381691941063049e-08, |
| "learning_rate": 4.8484848484848486e-08, |
| "logits/generated": 3.5154712200164795, |
| "logits/real": 3.966923236846924, |
| "logps/generated": -1584.695068359375, |
| "logps/real": -515.33984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -360.78082275390625, |
| "rewards/margins": 340.842529296875, |
| "rewards/real": -19.938283920288086, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.418181818181818, |
| "grad_norm": 0.0, |
| "learning_rate": 4.781144781144781e-08, |
| "logits/generated": 3.2723331451416016, |
| "logits/real": 4.094303131103516, |
| "logps/generated": -1407.6539306640625, |
| "logps/real": -563.6630859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -374.88385009765625, |
| "rewards/margins": 347.38189697265625, |
| "rewards/real": -27.50186538696289, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.4545454545454546, |
| "grad_norm": 3.9897612503453264e-26, |
| "learning_rate": 4.713804713804714e-08, |
| "logits/generated": 3.8031222820281982, |
| "logits/real": 4.582549095153809, |
| "logps/generated": -1604.9886474609375, |
| "logps/real": -469.01214599609375, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -366.40118408203125, |
| "rewards/margins": 351.3585510253906, |
| "rewards/real": -15.042654037475586, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.4909090909090907, |
| "grad_norm": 1.1807088805922945e-20, |
| "learning_rate": 4.646464646464646e-08, |
| "logits/generated": 3.4880664348602295, |
| "logits/real": 3.898571014404297, |
| "logps/generated": -1486.0142822265625, |
| "logps/real": -506.51531982421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -352.26165771484375, |
| "rewards/margins": 333.8659973144531, |
| "rewards/real": -18.395660400390625, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.5272727272727273, |
| "grad_norm": 2.4268936271701313e-18, |
| "learning_rate": 4.5791245791245794e-08, |
| "logits/generated": 3.4788355827331543, |
| "logits/real": 3.822808027267456, |
| "logps/generated": -1364.0, |
| "logps/real": -505.9129333496094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -365.39385986328125, |
| "rewards/margins": 347.33087158203125, |
| "rewards/real": -18.06302833557129, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.5636363636363635, |
| "grad_norm": 0.0, |
| "learning_rate": 4.5117845117845114e-08, |
| "logits/generated": 4.036175727844238, |
| "logits/real": 3.6656670570373535, |
| "logps/generated": -1476.5787353515625, |
| "logps/real": -500.7046813964844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -350.3294982910156, |
| "rewards/margins": 334.7972412109375, |
| "rewards/real": -15.532255172729492, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 3.456223568001992e-30, |
| "learning_rate": 4.444444444444444e-08, |
| "logits/generated": 3.490968704223633, |
| "logits/real": 3.8967537879943848, |
| "logps/generated": -1640.80859375, |
| "logps/real": -535.5618896484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -407.5831298828125, |
| "rewards/margins": 386.76226806640625, |
| "rewards/real": -20.820871353149414, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 2.4055459783409575e-31, |
| "learning_rate": 4.377104377104377e-08, |
| "logits/generated": 3.8550515174865723, |
| "logits/real": 3.984800338745117, |
| "logps/generated": -1491.381591796875, |
| "logps/real": -536.5324096679688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -340.267822265625, |
| "rewards/margins": 324.9073791503906, |
| "rewards/real": -15.3604736328125, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.672727272727273, |
| "grad_norm": 8.548157838153945e-25, |
| "learning_rate": 4.3097643097643095e-08, |
| "logits/generated": 3.985334873199463, |
| "logits/real": 4.183938026428223, |
| "logps/generated": -2043.4547119140625, |
| "logps/real": -523.0987548828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -398.6493225097656, |
| "rewards/margins": 380.72906494140625, |
| "rewards/real": -17.920269012451172, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.709090909090909, |
| "grad_norm": 3.0175261180501287e-07, |
| "learning_rate": 4.242424242424242e-08, |
| "logits/generated": 3.4043655395507812, |
| "logits/real": 4.124855041503906, |
| "logps/generated": -1469.7431640625, |
| "logps/real": -555.0632934570312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -387.6515808105469, |
| "rewards/margins": 371.17645263671875, |
| "rewards/real": -16.475154876708984, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.7454545454545456, |
| "grad_norm": 0.0, |
| "learning_rate": 4.175084175084175e-08, |
| "logits/generated": 3.7843337059020996, |
| "logits/real": 4.101785182952881, |
| "logps/generated": -1506.9027099609375, |
| "logps/real": -522.604736328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -355.43914794921875, |
| "rewards/margins": 334.7210693359375, |
| "rewards/real": -20.71808624267578, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.7818181818181817, |
| "grad_norm": 2.4111652226040675e-26, |
| "learning_rate": 4.1077441077441075e-08, |
| "logits/generated": 3.670746326446533, |
| "logits/real": 3.747389554977417, |
| "logps/generated": -1680.3013916015625, |
| "logps/real": -483.5108947753906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -358.0954895019531, |
| "rewards/margins": 347.24761962890625, |
| "rewards/real": -10.84788703918457, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.8181818181818183, |
| "grad_norm": 1.2074440559893782e-13, |
| "learning_rate": 4.04040404040404e-08, |
| "logits/generated": 3.5393459796905518, |
| "logits/real": 4.060755729675293, |
| "logps/generated": -1671.9293212890625, |
| "logps/real": -480.4324645996094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -372.8212585449219, |
| "rewards/margins": 355.241455078125, |
| "rewards/real": -17.579803466796875, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.8545454545454545, |
| "grad_norm": 2.0246114885936573e-34, |
| "learning_rate": 3.973063973063973e-08, |
| "logits/generated": 4.0824480056762695, |
| "logits/real": 3.579103946685791, |
| "logps/generated": -1707.8812255859375, |
| "logps/real": -445.0042419433594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -417.997802734375, |
| "rewards/margins": 405.29876708984375, |
| "rewards/real": -12.699002265930176, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.8909090909090907, |
| "grad_norm": 3.210144860679204e-17, |
| "learning_rate": 3.9057239057239056e-08, |
| "logits/generated": 3.7562339305877686, |
| "logits/real": 4.343286037445068, |
| "logps/generated": -1494.666015625, |
| "logps/real": -505.9931640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -367.2537536621094, |
| "rewards/margins": 357.6014099121094, |
| "rewards/real": -9.65237808227539, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.9272727272727272, |
| "grad_norm": 2.9805943462413785e-15, |
| "learning_rate": 3.838383838383838e-08, |
| "logits/generated": 3.5484557151794434, |
| "logits/real": 4.100916862487793, |
| "logps/generated": -1519.9788818359375, |
| "logps/real": -524.9559936523438, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -342.6521911621094, |
| "rewards/margins": 327.91302490234375, |
| "rewards/real": -14.739137649536133, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.963636363636364, |
| "grad_norm": 0.0, |
| "learning_rate": 3.771043771043771e-08, |
| "logits/generated": 3.668537139892578, |
| "logits/real": 3.8592376708984375, |
| "logps/generated": -1508.808349609375, |
| "logps/real": -527.437744140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -353.4441833496094, |
| "rewards/margins": 339.4230651855469, |
| "rewards/real": -14.0211181640625, |
| "step": 1090 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.5006449032805113, |
| "learning_rate": 3.7037037037037036e-08, |
| "logits/generated": 3.732149600982666, |
| "logits/real": 3.8971447944641113, |
| "logps/generated": -1593.047607421875, |
| "logps/real": -482.2001037597656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -367.11297607421875, |
| "rewards/margins": 348.68048095703125, |
| "rewards/real": -18.43246078491211, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.036363636363636, |
| "grad_norm": 2.814433616854974e-27, |
| "learning_rate": 3.636363636363636e-08, |
| "logits/generated": 3.4156410694122314, |
| "logits/real": 4.165556907653809, |
| "logps/generated": -1596.3184814453125, |
| "logps/real": -573.5258178710938, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -373.71844482421875, |
| "rewards/margins": 361.01336669921875, |
| "rewards/real": -12.705034255981445, |
| "step": 1110 |
| }, |
| { |
| "epoch": 4.072727272727272, |
| "grad_norm": 8.319973607571124e-15, |
| "learning_rate": 3.569023569023569e-08, |
| "logits/generated": 3.273974895477295, |
| "logits/real": 4.1000566482543945, |
| "logps/generated": -1501.887451171875, |
| "logps/real": -576.9967041015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -380.444580078125, |
| "rewards/margins": 365.85614013671875, |
| "rewards/real": -14.588480949401855, |
| "step": 1120 |
| }, |
| { |
| "epoch": 4.109090909090909, |
| "grad_norm": 1.0017792103000478e-25, |
| "learning_rate": 3.501683501683502e-08, |
| "logits/generated": 3.6438803672790527, |
| "logits/real": 3.937204360961914, |
| "logps/generated": -1560.591064453125, |
| "logps/real": -508.82891845703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -353.357666015625, |
| "rewards/margins": 336.72894287109375, |
| "rewards/real": -16.628740310668945, |
| "step": 1130 |
| }, |
| { |
| "epoch": 4.1454545454545455, |
| "grad_norm": 2.4734257256576104e-16, |
| "learning_rate": 3.4343434343434344e-08, |
| "logits/generated": 3.737811326980591, |
| "logits/real": 4.338569641113281, |
| "logps/generated": -1994.470947265625, |
| "logps/real": -508.37786865234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -393.4563293457031, |
| "rewards/margins": 373.7255554199219, |
| "rewards/real": -19.73076820373535, |
| "step": 1140 |
| }, |
| { |
| "epoch": 4.181818181818182, |
| "grad_norm": 4.238864478180666e-30, |
| "learning_rate": 3.367003367003367e-08, |
| "logits/generated": 3.8650360107421875, |
| "logits/real": 4.163536071777344, |
| "logps/generated": -1378.536376953125, |
| "logps/real": -512.680419921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -349.7162780761719, |
| "rewards/margins": 331.3489685058594, |
| "rewards/real": -18.367273330688477, |
| "step": 1150 |
| }, |
| { |
| "epoch": 4.218181818181818, |
| "grad_norm": 2.4861501835665235e-19, |
| "learning_rate": 3.2996632996633e-08, |
| "logits/generated": 3.3802618980407715, |
| "logits/real": 4.011307716369629, |
| "logps/generated": -1465.8193359375, |
| "logps/real": -574.6527099609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -379.3877868652344, |
| "rewards/margins": 363.7222595214844, |
| "rewards/real": -15.665545463562012, |
| "step": 1160 |
| }, |
| { |
| "epoch": 4.254545454545455, |
| "grad_norm": 6.086185442066954e-31, |
| "learning_rate": 3.2323232323232324e-08, |
| "logits/generated": 3.559816360473633, |
| "logits/real": 4.023754119873047, |
| "logps/generated": -1803.6634521484375, |
| "logps/real": -554.5623168945312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -388.7065734863281, |
| "rewards/margins": 371.402587890625, |
| "rewards/real": -17.303974151611328, |
| "step": 1170 |
| }, |
| { |
| "epoch": 4.290909090909091, |
| "grad_norm": 7.871786797978349e-27, |
| "learning_rate": 3.164983164983165e-08, |
| "logits/generated": 3.4312922954559326, |
| "logits/real": 3.8822197914123535, |
| "logps/generated": -1608.0223388671875, |
| "logps/real": -601.10302734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -379.69024658203125, |
| "rewards/margins": 345.4664611816406, |
| "rewards/real": -34.223793029785156, |
| "step": 1180 |
| }, |
| { |
| "epoch": 4.327272727272727, |
| "grad_norm": 2.3723518217219765e-22, |
| "learning_rate": 3.097643097643097e-08, |
| "logits/generated": 3.6023402214050293, |
| "logits/real": 4.428885459899902, |
| "logps/generated": -1807.38671875, |
| "logps/real": -493.84429931640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -422.08502197265625, |
| "rewards/margins": 390.57745361328125, |
| "rewards/real": -31.507583618164062, |
| "step": 1190 |
| }, |
| { |
| "epoch": 4.363636363636363, |
| "grad_norm": 2.433404900713148e-21, |
| "learning_rate": 3.0303030303030305e-08, |
| "logits/generated": 3.514697551727295, |
| "logits/real": 3.9592909812927246, |
| "logps/generated": -1493.529541015625, |
| "logps/real": -601.3435668945312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -357.14764404296875, |
| "rewards/margins": 331.7057189941406, |
| "rewards/real": -25.441919326782227, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.0, |
| "learning_rate": 2.9629629629629625e-08, |
| "logits/generated": 3.5242507457733154, |
| "logits/real": 3.908127546310425, |
| "logps/generated": -1402.09716796875, |
| "logps/real": -534.2159423828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -397.23370361328125, |
| "rewards/margins": 370.5049743652344, |
| "rewards/real": -26.72869873046875, |
| "step": 1210 |
| }, |
| { |
| "epoch": 4.4363636363636365, |
| "grad_norm": 6.204048985218319e-20, |
| "learning_rate": 2.8956228956228955e-08, |
| "logits/generated": 3.7267520427703857, |
| "logits/real": 4.163025379180908, |
| "logps/generated": -1701.4326171875, |
| "logps/real": -520.2593383789062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -418.1678771972656, |
| "rewards/margins": 393.18682861328125, |
| "rewards/real": -24.981050491333008, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.472727272727273, |
| "grad_norm": 1.9021979406608815e-23, |
| "learning_rate": 2.8282828282828282e-08, |
| "logits/generated": 3.478511095046997, |
| "logits/real": 4.237907409667969, |
| "logps/generated": -1657.092041015625, |
| "logps/real": -518.4788818359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -415.843505859375, |
| "rewards/margins": 392.0099792480469, |
| "rewards/real": -23.83350372314453, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.509090909090909, |
| "grad_norm": 3.7469204488575616e-32, |
| "learning_rate": 2.760942760942761e-08, |
| "logits/generated": 3.6705174446105957, |
| "logits/real": 3.9910926818847656, |
| "logps/generated": -1534.4541015625, |
| "logps/real": -463.1044006347656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -420.45538330078125, |
| "rewards/margins": 390.87957763671875, |
| "rewards/real": -29.57583236694336, |
| "step": 1240 |
| }, |
| { |
| "epoch": 4.545454545454545, |
| "grad_norm": 0.0, |
| "learning_rate": 2.6936026936026936e-08, |
| "logits/generated": 3.7410473823547363, |
| "logits/real": 4.114706993103027, |
| "logps/generated": -1494.1517333984375, |
| "logps/real": -452.7135314941406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -405.7174377441406, |
| "rewards/margins": 383.12359619140625, |
| "rewards/real": -22.593822479248047, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.581818181818182, |
| "grad_norm": 1.4884899843309194e-06, |
| "learning_rate": 2.6262626262626263e-08, |
| "logits/generated": 3.5744926929473877, |
| "logits/real": 4.0339460372924805, |
| "logps/generated": -1389.479248046875, |
| "logps/real": -527.5264892578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -397.1904296875, |
| "rewards/margins": 371.60284423828125, |
| "rewards/real": -25.587596893310547, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.618181818181818, |
| "grad_norm": 1.6503647943638648e-14, |
| "learning_rate": 2.558922558922559e-08, |
| "logits/generated": 4.005766868591309, |
| "logits/real": 4.283044338226318, |
| "logps/generated": -2155.259033203125, |
| "logps/real": -538.6444091796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -431.6084899902344, |
| "rewards/margins": 398.53594970703125, |
| "rewards/real": -33.07251739501953, |
| "step": 1270 |
| }, |
| { |
| "epoch": 4.654545454545454, |
| "grad_norm": 7.146810977608247e-31, |
| "learning_rate": 2.4915824915824916e-08, |
| "logits/generated": 3.9285404682159424, |
| "logits/real": 3.7168357372283936, |
| "logps/generated": -1552.205078125, |
| "logps/real": -487.2206115722656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -389.40643310546875, |
| "rewards/margins": 362.6419677734375, |
| "rewards/real": -26.764474868774414, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.690909090909091, |
| "grad_norm": 0.0, |
| "learning_rate": 2.4242424242424243e-08, |
| "logits/generated": 3.776773452758789, |
| "logits/real": 4.315047264099121, |
| "logps/generated": -1338.3245849609375, |
| "logps/real": -558.236083984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -364.6140441894531, |
| "rewards/margins": 339.8752136230469, |
| "rewards/real": -24.738853454589844, |
| "step": 1290 |
| }, |
| { |
| "epoch": 4.7272727272727275, |
| "grad_norm": 0.0, |
| "learning_rate": 2.356902356902357e-08, |
| "logits/generated": 3.5295321941375732, |
| "logits/real": 4.021418571472168, |
| "logps/generated": -1478.497314453125, |
| "logps/real": -499.4085388183594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -414.16436767578125, |
| "rewards/margins": 385.1223449707031, |
| "rewards/real": -29.041961669921875, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.763636363636364, |
| "grad_norm": 1.6611749401721815e-30, |
| "learning_rate": 2.2895622895622897e-08, |
| "logits/generated": 3.9792373180389404, |
| "logits/real": 4.20035457611084, |
| "logps/generated": -1674.936767578125, |
| "logps/real": -557.9586791992188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -390.08990478515625, |
| "rewards/margins": 362.26739501953125, |
| "rewards/real": -27.822498321533203, |
| "step": 1310 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 1.3498064441084664e-28, |
| "learning_rate": 2.222222222222222e-08, |
| "logits/generated": 3.1779017448425293, |
| "logits/real": 3.6151771545410156, |
| "logps/generated": -1536.013916015625, |
| "logps/real": -532.0733642578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -406.447265625, |
| "rewards/margins": 378.7431640625, |
| "rewards/real": -27.704113006591797, |
| "step": 1320 |
| }, |
| { |
| "epoch": 4.836363636363636, |
| "grad_norm": 1.4446934503565444e-19, |
| "learning_rate": 2.1548821548821547e-08, |
| "logits/generated": 3.237544536590576, |
| "logits/real": 3.6873066425323486, |
| "logps/generated": -1580.73388671875, |
| "logps/real": -503.3658752441406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -422.28558349609375, |
| "rewards/margins": 395.5210876464844, |
| "rewards/real": -26.764461517333984, |
| "step": 1330 |
| }, |
| { |
| "epoch": 4.872727272727273, |
| "grad_norm": 1.338898110592278e-13, |
| "learning_rate": 2.0875420875420874e-08, |
| "logits/generated": 3.466773271560669, |
| "logits/real": 4.2854814529418945, |
| "logps/generated": -1519.102294921875, |
| "logps/real": -484.23529052734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -394.77484130859375, |
| "rewards/margins": 370.26416015625, |
| "rewards/real": -24.510705947875977, |
| "step": 1340 |
| }, |
| { |
| "epoch": 4.909090909090909, |
| "grad_norm": 9.330317502415284e-18, |
| "learning_rate": 2.02020202020202e-08, |
| "logits/generated": 3.3465042114257812, |
| "logits/real": 4.172385215759277, |
| "logps/generated": -1484.463134765625, |
| "logps/real": -564.1114501953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -399.4004211425781, |
| "rewards/margins": 375.4627380371094, |
| "rewards/real": -23.937633514404297, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.945454545454545, |
| "grad_norm": 1.0844770956022308e-29, |
| "learning_rate": 1.9528619528619528e-08, |
| "logits/generated": 3.637136936187744, |
| "logits/real": 3.7470059394836426, |
| "logps/generated": -1451.9078369140625, |
| "logps/real": -555.2150268554688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -409.23699951171875, |
| "rewards/margins": 382.7684020996094, |
| "rewards/real": -26.468582153320312, |
| "step": 1360 |
| }, |
| { |
| "epoch": 4.9818181818181815, |
| "grad_norm": 1.8117589004328856e-21, |
| "learning_rate": 1.8855218855218855e-08, |
| "logits/generated": 3.419265031814575, |
| "logits/real": 4.801307678222656, |
| "logps/generated": -1575.279541015625, |
| "logps/real": -505.57366943359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -392.1907653808594, |
| "rewards/margins": 370.88433837890625, |
| "rewards/real": -21.30645751953125, |
| "step": 1370 |
| }, |
| { |
| "epoch": 5.0181818181818185, |
| "grad_norm": 1.1855551970740116e-24, |
| "learning_rate": 1.818181818181818e-08, |
| "logits/generated": 3.845301389694214, |
| "logits/real": 4.155182361602783, |
| "logps/generated": -1829.9830322265625, |
| "logps/real": -475.1502990722656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -390.05047607421875, |
| "rewards/margins": 369.96630859375, |
| "rewards/real": -20.084190368652344, |
| "step": 1380 |
| }, |
| { |
| "epoch": 5.054545454545455, |
| "grad_norm": 0.0, |
| "learning_rate": 1.750841750841751e-08, |
| "logits/generated": 3.4445242881774902, |
| "logits/real": 3.744450092315674, |
| "logps/generated": -1426.3824462890625, |
| "logps/real": -522.4410400390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -403.8083190917969, |
| "rewards/margins": 379.8366394042969, |
| "rewards/real": -23.9716854095459, |
| "step": 1390 |
| }, |
| { |
| "epoch": 5.090909090909091, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6835016835016835e-08, |
| "logits/generated": 3.5476207733154297, |
| "logits/real": 4.142487049102783, |
| "logps/generated": -1349.896484375, |
| "logps/real": -569.4417724609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -349.24212646484375, |
| "rewards/margins": 328.8203430175781, |
| "rewards/real": -20.42179298400879, |
| "step": 1400 |
| }, |
| { |
| "epoch": 5.127272727272727, |
| "grad_norm": 0.0, |
| "learning_rate": 1.6161616161616162e-08, |
| "logits/generated": 3.408921003341675, |
| "logits/real": 3.993192195892334, |
| "logps/generated": -1857.1177978515625, |
| "logps/real": -529.3366088867188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -444.29205322265625, |
| "rewards/margins": 420.785888671875, |
| "rewards/real": -23.506126403808594, |
| "step": 1410 |
| }, |
| { |
| "epoch": 5.163636363636364, |
| "grad_norm": 4.017349771154928e-20, |
| "learning_rate": 1.5488215488215486e-08, |
| "logits/generated": 3.642667770385742, |
| "logits/real": 3.8170886039733887, |
| "logps/generated": -1371.3741455078125, |
| "logps/real": -572.6683349609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -362.79974365234375, |
| "rewards/margins": 334.07781982421875, |
| "rewards/real": -28.721933364868164, |
| "step": 1420 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 1.3639315235721727e-16, |
| "learning_rate": 1.4814814814814813e-08, |
| "logits/generated": 3.6792221069335938, |
| "logits/real": 4.338517189025879, |
| "logps/generated": -1340.431884765625, |
| "logps/real": -553.2203369140625, |
| "loss": 0.0052, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -373.75408935546875, |
| "rewards/margins": 357.2299499511719, |
| "rewards/real": -16.524112701416016, |
| "step": 1430 |
| }, |
| { |
| "epoch": 5.236363636363636, |
| "grad_norm": 3.081123632921718e-23, |
| "learning_rate": 1.4141414141414141e-08, |
| "logits/generated": 3.450949192047119, |
| "logits/real": 3.970660448074341, |
| "logps/generated": -1683.385986328125, |
| "logps/real": -535.0615234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -463.681884765625, |
| "rewards/margins": 442.11138916015625, |
| "rewards/real": -21.570537567138672, |
| "step": 1440 |
| }, |
| { |
| "epoch": 5.2727272727272725, |
| "grad_norm": 3.0728626723669876e-21, |
| "learning_rate": 1.3468013468013468e-08, |
| "logits/generated": 3.5929648876190186, |
| "logits/real": 4.298793792724609, |
| "logps/generated": -1646.7025146484375, |
| "logps/real": -518.2625122070312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -394.73199462890625, |
| "rewards/margins": 378.03753662109375, |
| "rewards/real": -16.694438934326172, |
| "step": 1450 |
| }, |
| { |
| "epoch": 5.309090909090909, |
| "grad_norm": 4.0070865257592495e-32, |
| "learning_rate": 1.2794612794612795e-08, |
| "logits/generated": 3.2639718055725098, |
| "logits/real": 3.8822433948516846, |
| "logps/generated": -1574.0999755859375, |
| "logps/real": -550.0856323242188, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -377.2677917480469, |
| "rewards/margins": 360.36578369140625, |
| "rewards/real": -16.902013778686523, |
| "step": 1460 |
| }, |
| { |
| "epoch": 5.345454545454546, |
| "grad_norm": 1.9271475273942652e-15, |
| "learning_rate": 1.2121212121212122e-08, |
| "logits/generated": 3.7863450050354004, |
| "logits/real": 4.053950786590576, |
| "logps/generated": -1627.314208984375, |
| "logps/real": -525.3707275390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -400.48150634765625, |
| "rewards/margins": 378.9859313964844, |
| "rewards/real": -21.495540618896484, |
| "step": 1470 |
| }, |
| { |
| "epoch": 5.381818181818182, |
| "grad_norm": 0.0, |
| "learning_rate": 1.1447811447811448e-08, |
| "logits/generated": 3.581354856491089, |
| "logits/real": 3.8952338695526123, |
| "logps/generated": -1557.240966796875, |
| "logps/real": -485.9366149902344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -376.5646057128906, |
| "rewards/margins": 362.3238220214844, |
| "rewards/real": -14.240735054016113, |
| "step": 1480 |
| }, |
| { |
| "epoch": 5.418181818181818, |
| "grad_norm": 0.0, |
| "learning_rate": 1.0774410774410774e-08, |
| "logits/generated": 3.418860673904419, |
| "logits/real": 4.168183326721191, |
| "logps/generated": -1425.8209228515625, |
| "logps/real": -488.4073181152344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -397.9587097167969, |
| "rewards/margins": 378.22540283203125, |
| "rewards/real": -19.733327865600586, |
| "step": 1490 |
| }, |
| { |
| "epoch": 5.454545454545454, |
| "grad_norm": 0.0, |
| "learning_rate": 1.01010101010101e-08, |
| "logits/generated": 3.3258609771728516, |
| "logits/real": 3.9882960319519043, |
| "logps/generated": -1420.126708984375, |
| "logps/real": -562.3682250976562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -379.91461181640625, |
| "rewards/margins": 360.4001770019531, |
| "rewards/real": -19.51443099975586, |
| "step": 1500 |
| }, |
| { |
| "epoch": 5.490909090909091, |
| "grad_norm": 1.2370404728759755e-12, |
| "learning_rate": 9.427609427609427e-09, |
| "logits/generated": 3.9953675270080566, |
| "logits/real": 4.288315773010254, |
| "logps/generated": -1769.1597900390625, |
| "logps/real": -541.0712890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -395.64190673828125, |
| "rewards/margins": 376.20367431640625, |
| "rewards/real": -19.43826675415039, |
| "step": 1510 |
| }, |
| { |
| "epoch": 5.527272727272727, |
| "grad_norm": 3.606466171226265e-24, |
| "learning_rate": 8.754208754208754e-09, |
| "logits/generated": 3.4135119915008545, |
| "logits/real": 4.049647808074951, |
| "logps/generated": -1462.7747802734375, |
| "logps/real": -554.20849609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -423.81182861328125, |
| "rewards/margins": 400.8638610839844, |
| "rewards/real": -22.94796371459961, |
| "step": 1520 |
| }, |
| { |
| "epoch": 5.5636363636363635, |
| "grad_norm": 6.1808472626225334e-27, |
| "learning_rate": 8.080808080808081e-09, |
| "logits/generated": 3.4834237098693848, |
| "logits/real": 3.9518351554870605, |
| "logps/generated": -1418.6181640625, |
| "logps/real": -531.7501220703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -375.73388671875, |
| "rewards/margins": 351.1172790527344, |
| "rewards/real": -24.616636276245117, |
| "step": 1530 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 7.484860551860185e-19, |
| "learning_rate": 7.407407407407406e-09, |
| "logits/generated": 3.5700645446777344, |
| "logits/real": 3.8483328819274902, |
| "logps/generated": -1562.0670166015625, |
| "logps/real": -544.864501953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -423.4938049316406, |
| "rewards/margins": 397.80877685546875, |
| "rewards/real": -25.68499755859375, |
| "step": 1540 |
| }, |
| { |
| "epoch": 5.636363636363637, |
| "grad_norm": 2.060032570194185e-26, |
| "learning_rate": 6.734006734006734e-09, |
| "logits/generated": 3.778414249420166, |
| "logits/real": 4.111809253692627, |
| "logps/generated": -1598.574951171875, |
| "logps/real": -540.5823364257812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -392.5323486328125, |
| "rewards/margins": 368.44403076171875, |
| "rewards/real": -24.08827018737793, |
| "step": 1550 |
| }, |
| { |
| "epoch": 5.672727272727273, |
| "grad_norm": 2.891660272317984e-18, |
| "learning_rate": 6.060606060606061e-09, |
| "logits/generated": 3.6908371448516846, |
| "logits/real": 3.9306640625, |
| "logps/generated": -1735.0189208984375, |
| "logps/real": -525.3858032226562, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -403.2166748046875, |
| "rewards/margins": 380.7942199707031, |
| "rewards/real": -22.42244529724121, |
| "step": 1560 |
| }, |
| { |
| "epoch": 5.709090909090909, |
| "grad_norm": 0.5773051194749282, |
| "learning_rate": 5.387205387205387e-09, |
| "logits/generated": 3.379422426223755, |
| "logits/real": 3.869427442550659, |
| "logps/generated": -1555.796142578125, |
| "logps/real": -574.700927734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -379.06866455078125, |
| "rewards/margins": 355.59228515625, |
| "rewards/real": -23.47635269165039, |
| "step": 1570 |
| }, |
| { |
| "epoch": 5.745454545454545, |
| "grad_norm": 3.9363255052487036e-30, |
| "learning_rate": 4.713804713804714e-09, |
| "logits/generated": 3.4176697731018066, |
| "logits/real": 3.742943525314331, |
| "logps/generated": -1533.4945068359375, |
| "logps/real": -517.671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -388.6910705566406, |
| "rewards/margins": 369.216796875, |
| "rewards/real": -19.474323272705078, |
| "step": 1580 |
| }, |
| { |
| "epoch": 5.781818181818182, |
| "grad_norm": 1.674243115474082e-06, |
| "learning_rate": 4.0404040404040405e-09, |
| "logits/generated": 3.578691005706787, |
| "logits/real": 4.240370750427246, |
| "logps/generated": -1983.2786865234375, |
| "logps/real": -543.065185546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -423.01593017578125, |
| "rewards/margins": 398.25390625, |
| "rewards/real": -24.76194953918457, |
| "step": 1590 |
| }, |
| { |
| "epoch": 5.818181818181818, |
| "grad_norm": 1.5404600960548044e-09, |
| "learning_rate": 3.367003367003367e-09, |
| "logits/generated": 3.7445895671844482, |
| "logits/real": 3.5675225257873535, |
| "logps/generated": -2012.541259765625, |
| "logps/real": -534.4871215820312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -425.9609375, |
| "rewards/margins": 397.54046630859375, |
| "rewards/real": -28.42045021057129, |
| "step": 1600 |
| }, |
| { |
| "epoch": 5.8545454545454545, |
| "grad_norm": 2.3164162133427578e-17, |
| "learning_rate": 2.6936026936026934e-09, |
| "logits/generated": 3.759115219116211, |
| "logits/real": 3.735142946243286, |
| "logps/generated": -1591.992919921875, |
| "logps/real": -514.6450805664062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -417.63653564453125, |
| "rewards/margins": 393.0400390625, |
| "rewards/real": -24.596487045288086, |
| "step": 1610 |
| }, |
| { |
| "epoch": 5.890909090909091, |
| "grad_norm": 3.533525435981294e-29, |
| "learning_rate": 2.0202020202020203e-09, |
| "logits/generated": 3.510359287261963, |
| "logits/real": 3.4505932331085205, |
| "logps/generated": -1413.510986328125, |
| "logps/real": -598.2730712890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -406.50311279296875, |
| "rewards/margins": 370.1054992675781, |
| "rewards/real": -36.397682189941406, |
| "step": 1620 |
| }, |
| { |
| "epoch": 5.927272727272728, |
| "grad_norm": 1.793526514420287e-22, |
| "learning_rate": 1.3468013468013467e-09, |
| "logits/generated": 3.410717010498047, |
| "logits/real": 3.9544615745544434, |
| "logps/generated": -1549.203369140625, |
| "logps/real": -573.8199462890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -372.239501953125, |
| "rewards/margins": 344.86798095703125, |
| "rewards/real": -27.37152099609375, |
| "step": 1630 |
| }, |
| { |
| "epoch": 5.963636363636364, |
| "grad_norm": 9.588588952918977e-23, |
| "learning_rate": 6.734006734006734e-10, |
| "logits/generated": 3.7320778369903564, |
| "logits/real": 4.073027610778809, |
| "logps/generated": -1287.1448974609375, |
| "logps/real": -473.1319885253906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -355.93621826171875, |
| "rewards/margins": 333.669189453125, |
| "rewards/real": -22.26704978942871, |
| "step": 1640 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 3.4581900765324e-15, |
| "learning_rate": 0.0, |
| "logits/generated": 3.713982105255127, |
| "logits/real": 3.768115997314453, |
| "logps/generated": -1897.5390625, |
| "logps/real": -579.6484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -457.3140563964844, |
| "rewards/margins": 419.3667907714844, |
| "rewards/real": -37.947288513183594, |
| "step": 1650 |
| }, |
| { |
| "epoch": 6.0, |
| "step": 1650, |
| "total_flos": 0.0, |
| "train_loss": 0.16329280181607425, |
| "train_runtime": 21629.7145, |
| "train_samples_per_second": 4.882, |
| "train_steps_per_second": 0.076 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1650, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|