| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.0, |
| "eval_steps": 100, |
| "global_step": 1650, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0036363636363636364, |
| "grad_norm": 1914.0669841335673, |
| "learning_rate": 3.03030303030303e-09, |
| "logits/generated": 2.8247313499450684, |
| "logits/real": 2.0536062717437744, |
| "logps/generated": -351.4994201660156, |
| "logps/real": -477.549072265625, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/generated": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/real": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.03636363636363636, |
| "grad_norm": 957.205299846335, |
| "learning_rate": 3.0303030303030305e-08, |
| "logits/generated": 2.8234548568725586, |
| "logits/real": 2.13620662689209, |
| "logps/generated": -728.49853515625, |
| "logps/real": -385.6576843261719, |
| "loss": 0.6834, |
| "rewards/accuracies": 0.4861111044883728, |
| "rewards/generated": -0.06369408965110779, |
| "rewards/margins": 0.06537667661905289, |
| "rewards/real": 0.001682590926066041, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.07272727272727272, |
| "grad_norm": 186.10374621715866, |
| "learning_rate": 6.060606060606061e-08, |
| "logits/generated": 3.0520665645599365, |
| "logits/real": 2.3358449935913086, |
| "logps/generated": -814.5985717773438, |
| "logps/real": -449.64654541015625, |
| "loss": 0.5796, |
| "rewards/accuracies": 0.75, |
| "rewards/generated": -0.590421199798584, |
| "rewards/margins": 0.5598743557929993, |
| "rewards/real": -0.030546903610229492, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.10909090909090909, |
| "grad_norm": 89.49394324770884, |
| "learning_rate": 9.09090909090909e-08, |
| "logits/generated": 3.1544127464294434, |
| "logits/real": 2.341243028640747, |
| "logps/generated": -614.533203125, |
| "logps/real": -494.87823486328125, |
| "loss": 0.4, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/generated": -1.4715439081192017, |
| "rewards/margins": 1.2540919780731201, |
| "rewards/real": -0.21745213866233826, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14545454545454545, |
| "grad_norm": 56.92903236170165, |
| "learning_rate": 1.2121212121212122e-07, |
| "logits/generated": 3.0897538661956787, |
| "logits/real": 2.354968309402466, |
| "logps/generated": -777.1187744140625, |
| "logps/real": -518.1546020507812, |
| "loss": 0.2207, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -4.063536167144775, |
| "rewards/margins": 3.547344923019409, |
| "rewards/real": -0.5161911845207214, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.18181818181818182, |
| "grad_norm": 22.146909469971494, |
| "learning_rate": 1.5151515151515152e-07, |
| "logits/generated": 3.1673903465270996, |
| "logits/real": 2.656967878341675, |
| "logps/generated": -958.2801513671875, |
| "logps/real": -417.46026611328125, |
| "loss": 0.1115, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -9.105631828308105, |
| "rewards/margins": 8.369694709777832, |
| "rewards/real": -0.7359374165534973, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.21818181818181817, |
| "grad_norm": 6.710513441399776, |
| "learning_rate": 1.818181818181818e-07, |
| "logits/generated": 3.143266439437866, |
| "logits/real": 2.533066749572754, |
| "logps/generated": -990.8375244140625, |
| "logps/real": -449.7086486816406, |
| "loss": 0.0388, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -12.80955696105957, |
| "rewards/margins": 11.602985382080078, |
| "rewards/real": -1.2065709829330444, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2545454545454545, |
| "grad_norm": 10.81059007018425, |
| "learning_rate": 2.121212121212121e-07, |
| "logits/generated": 3.335444688796997, |
| "logits/real": 2.734952449798584, |
| "logps/generated": -1207.6993408203125, |
| "logps/real": -473.37615966796875, |
| "loss": 0.0235, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -17.868305206298828, |
| "rewards/margins": 16.526086807250977, |
| "rewards/real": -1.3422199487686157, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.2909090909090909, |
| "grad_norm": 5.322710291928728, |
| "learning_rate": 2.4242424242424244e-07, |
| "logits/generated": 3.3226451873779297, |
| "logits/real": 2.602600336074829, |
| "logps/generated": -1233.799560546875, |
| "logps/real": -419.0763244628906, |
| "loss": 0.0111, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -21.38240623474121, |
| "rewards/margins": 19.921634674072266, |
| "rewards/real": -1.4607731103897095, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.32727272727272727, |
| "grad_norm": 2.803596256476958, |
| "learning_rate": 2.727272727272727e-07, |
| "logits/generated": 2.8833084106445312, |
| "logits/real": 2.695115804672241, |
| "logps/generated": -637.830078125, |
| "logps/real": -467.0171813964844, |
| "loss": 0.005, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -18.157733917236328, |
| "rewards/margins": 15.862768173217773, |
| "rewards/real": -2.2949652671813965, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.36363636363636365, |
| "grad_norm": 1.5269803313685761, |
| "learning_rate": 3.0303030303030305e-07, |
| "logits/generated": 3.1768739223480225, |
| "logits/real": 2.9757938385009766, |
| "logps/generated": -777.4149169921875, |
| "logps/real": -472.0936584472656, |
| "loss": 0.0028, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -21.743295669555664, |
| "rewards/margins": 19.30459976196289, |
| "rewards/real": -2.438692808151245, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 0.8658527456401439, |
| "learning_rate": 3.333333333333333e-07, |
| "logits/generated": 3.320415496826172, |
| "logits/real": 2.9949562549591064, |
| "logps/generated": -881.67431640625, |
| "logps/real": -489.31378173828125, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -26.766021728515625, |
| "rewards/margins": 24.18350601196289, |
| "rewards/real": -2.5825135707855225, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.43636363636363634, |
| "grad_norm": 0.7431319758468002, |
| "learning_rate": 3.636363636363636e-07, |
| "logits/generated": 3.732408046722412, |
| "logits/real": 3.3026363849639893, |
| "logps/generated": -1488.354736328125, |
| "logps/real": -397.7535400390625, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -43.310401916503906, |
| "rewards/margins": 41.15972137451172, |
| "rewards/real": -2.1506762504577637, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.4727272727272727, |
| "grad_norm": 0.3539310109847719, |
| "learning_rate": 3.939393939393939e-07, |
| "logits/generated": 3.5819716453552246, |
| "logits/real": 3.207610607147217, |
| "logps/generated": -1109.115478515625, |
| "logps/real": -468.93304443359375, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -35.37150955200195, |
| "rewards/margins": 33.080970764160156, |
| "rewards/real": -2.290536403656006, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.509090909090909, |
| "grad_norm": 0.36998938203803255, |
| "learning_rate": 4.242424242424242e-07, |
| "logits/generated": 3.399541139602661, |
| "logits/real": 3.217557430267334, |
| "logps/generated": -786.1734619140625, |
| "logps/real": -448.0440368652344, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -24.668045043945312, |
| "rewards/margins": 22.3218994140625, |
| "rewards/real": -2.3461461067199707, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.5454545454545454, |
| "grad_norm": 0.031003857964457825, |
| "learning_rate": 4.545454545454545e-07, |
| "logits/generated": 3.6583428382873535, |
| "logits/real": 3.1666476726531982, |
| "logps/generated": -1667.064453125, |
| "logps/real": -445.8917541503906, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -56.291542053222656, |
| "rewards/margins": 53.78043746948242, |
| "rewards/real": -2.5111021995544434, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.5818181818181818, |
| "grad_norm": 0.10216382354780827, |
| "learning_rate": 4.848484848484849e-07, |
| "logits/generated": 3.6542396545410156, |
| "logits/real": 3.454103469848633, |
| "logps/generated": -1330.52587890625, |
| "logps/real": -446.6952209472656, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -44.972652435302734, |
| "rewards/margins": 42.243167877197266, |
| "rewards/real": -2.7294909954071045, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.6181818181818182, |
| "grad_norm": 0.5393067289689158, |
| "learning_rate": 4.983164983164983e-07, |
| "logits/generated": 3.8200392723083496, |
| "logits/real": 3.27642560005188, |
| "logps/generated": -1152.6976318359375, |
| "logps/real": -457.31158447265625, |
| "loss": 0.0047, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -40.51918029785156, |
| "rewards/margins": 37.82313537597656, |
| "rewards/real": -2.696047306060791, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.6545454545454545, |
| "grad_norm": 0.1428440835513627, |
| "learning_rate": 4.949494949494949e-07, |
| "logits/generated": 3.628065586090088, |
| "logits/real": 3.3550992012023926, |
| "logps/generated": -922.5845947265625, |
| "logps/real": -401.19830322265625, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -32.044578552246094, |
| "rewards/margins": 29.7742977142334, |
| "rewards/real": -2.270280599594116, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.6909090909090909, |
| "grad_norm": 0.3679331054154446, |
| "learning_rate": 4.915824915824915e-07, |
| "logits/generated": 3.8065967559814453, |
| "logits/real": 3.234556198120117, |
| "logps/generated": -1263.3641357421875, |
| "logps/real": -417.69512939453125, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -43.110328674316406, |
| "rewards/margins": 40.8560676574707, |
| "rewards/real": -2.254263401031494, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.7272727272727273, |
| "grad_norm": 0.3074657387957313, |
| "learning_rate": 4.882154882154882e-07, |
| "logits/generated": 3.6317858695983887, |
| "logits/real": 3.2310867309570312, |
| "logps/generated": -977.9903564453125, |
| "logps/real": -551.634033203125, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -34.79268264770508, |
| "rewards/margins": 31.959827423095703, |
| "rewards/real": -2.832855701446533, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.7636363636363637, |
| "grad_norm": 0.059293841086296165, |
| "learning_rate": 4.848484848484849e-07, |
| "logits/generated": 3.605592727661133, |
| "logits/real": 3.393489122390747, |
| "logps/generated": -1668.871337890625, |
| "logps/real": -449.1474609375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -60.01628494262695, |
| "rewards/margins": 57.65550994873047, |
| "rewards/real": -2.360779285430908, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.08236734575362854, |
| "learning_rate": 4.814814814814814e-07, |
| "logits/generated": 3.7269718647003174, |
| "logits/real": 3.6311733722686768, |
| "logps/generated": -1147.8819580078125, |
| "logps/real": -363.1935729980469, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -41.762962341308594, |
| "rewards/margins": 39.59125518798828, |
| "rewards/real": -2.1717114448547363, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.8363636363636363, |
| "grad_norm": 0.06506668417151372, |
| "learning_rate": 4.781144781144781e-07, |
| "logits/generated": 3.6163909435272217, |
| "logits/real": 3.610844373703003, |
| "logps/generated": -1436.1513671875, |
| "logps/real": -456.0074157714844, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -52.98537063598633, |
| "rewards/margins": 49.76090621948242, |
| "rewards/real": -3.224468231201172, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.8727272727272727, |
| "grad_norm": 0.03507021137362982, |
| "learning_rate": 4.7474747474747474e-07, |
| "logits/generated": 3.553302764892578, |
| "logits/real": 3.47314715385437, |
| "logps/generated": -1206.565185546875, |
| "logps/real": -426.70867919921875, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -46.37542724609375, |
| "rewards/margins": 43.096046447753906, |
| "rewards/real": -3.279383420944214, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.9090909090909091, |
| "grad_norm": 3.960989766677836, |
| "learning_rate": 4.7138047138047136e-07, |
| "logits/generated": 3.4441192150115967, |
| "logits/real": 3.2921531200408936, |
| "logps/generated": -875.3850708007812, |
| "logps/real": -449.03936767578125, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -33.70497131347656, |
| "rewards/margins": 31.499170303344727, |
| "rewards/real": -2.2058050632476807, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.9454545454545454, |
| "grad_norm": 0.1118397976534182, |
| "learning_rate": 4.68013468013468e-07, |
| "logits/generated": 3.552502393722534, |
| "logits/real": 3.365135669708252, |
| "logps/generated": -925.8385009765625, |
| "logps/real": -420.988525390625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -35.2255744934082, |
| "rewards/margins": 33.324729919433594, |
| "rewards/real": -1.9008433818817139, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.9818181818181818, |
| "grad_norm": 0.0031177562577058694, |
| "learning_rate": 4.646464646464646e-07, |
| "logits/generated": 4.00087833404541, |
| "logits/real": 3.5007052421569824, |
| "logps/generated": -1584.2337646484375, |
| "logps/real": -434.68572998046875, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -60.17878341674805, |
| "rewards/margins": 57.7464714050293, |
| "rewards/real": -2.4323067665100098, |
| "step": 270 |
| }, |
| { |
| "epoch": 1.018181818181818, |
| "grad_norm": 0.05555093819949706, |
| "learning_rate": 4.612794612794613e-07, |
| "logits/generated": 3.8233509063720703, |
| "logits/real": 3.6865200996398926, |
| "logps/generated": -1354.489501953125, |
| "logps/real": -416.71820068359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -50.17599868774414, |
| "rewards/margins": 48.03873825073242, |
| "rewards/real": -2.1372580528259277, |
| "step": 280 |
| }, |
| { |
| "epoch": 1.0545454545454545, |
| "grad_norm": 0.017890113281613322, |
| "learning_rate": 4.579124579124579e-07, |
| "logits/generated": 3.7898383140563965, |
| "logits/real": 3.3585867881774902, |
| "logps/generated": -840.5850830078125, |
| "logps/real": -517.4666748046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -31.330394744873047, |
| "rewards/margins": 28.912433624267578, |
| "rewards/real": -2.4179651737213135, |
| "step": 290 |
| }, |
| { |
| "epoch": 1.0909090909090908, |
| "grad_norm": 0.02423097085406043, |
| "learning_rate": 4.545454545454545e-07, |
| "logits/generated": 3.695669651031494, |
| "logits/real": 3.6764883995056152, |
| "logps/generated": -1163.597412109375, |
| "logps/real": -468.89691162109375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -45.13127899169922, |
| "rewards/margins": 42.545928955078125, |
| "rewards/real": -2.5853497982025146, |
| "step": 300 |
| }, |
| { |
| "epoch": 1.1272727272727272, |
| "grad_norm": 0.0049484467104669565, |
| "learning_rate": 4.5117845117845114e-07, |
| "logits/generated": 3.732903242111206, |
| "logits/real": 3.7502872943878174, |
| "logps/generated": -1116.242919921875, |
| "logps/real": -363.4823303222656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -42.95992660522461, |
| "rewards/margins": 40.67012023925781, |
| "rewards/real": -2.289808511734009, |
| "step": 310 |
| }, |
| { |
| "epoch": 1.1636363636363636, |
| "grad_norm": 0.009030461223581908, |
| "learning_rate": 4.478114478114478e-07, |
| "logits/generated": 3.9101524353027344, |
| "logits/real": 3.1595616340637207, |
| "logps/generated": -1227.79638671875, |
| "logps/real": -436.49359130859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -46.5848503112793, |
| "rewards/margins": 44.355323791503906, |
| "rewards/real": -2.2295217514038086, |
| "step": 320 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 0.003881099666210952, |
| "learning_rate": 4.444444444444444e-07, |
| "logits/generated": 3.598174571990967, |
| "logits/real": 3.6226792335510254, |
| "logps/generated": -1062.3441162109375, |
| "logps/real": -417.962158203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -41.58740234375, |
| "rewards/margins": 39.050086975097656, |
| "rewards/real": -2.53731369972229, |
| "step": 330 |
| }, |
| { |
| "epoch": 1.2363636363636363, |
| "grad_norm": 0.11244938516399426, |
| "learning_rate": 4.4107744107744106e-07, |
| "logits/generated": 3.567678451538086, |
| "logits/real": 3.7249159812927246, |
| "logps/generated": -961.2346801757812, |
| "logps/real": -434.4712829589844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -38.949554443359375, |
| "rewards/margins": 36.227928161621094, |
| "rewards/real": -2.721627950668335, |
| "step": 340 |
| }, |
| { |
| "epoch": 1.2727272727272727, |
| "grad_norm": 0.023911125609004182, |
| "learning_rate": 4.377104377104377e-07, |
| "logits/generated": 3.596735715866089, |
| "logits/real": 3.5958075523376465, |
| "logps/generated": -1215.8427734375, |
| "logps/real": -439.3638610839844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -47.29825210571289, |
| "rewards/margins": 44.59672164916992, |
| "rewards/real": -2.701531171798706, |
| "step": 350 |
| }, |
| { |
| "epoch": 1.309090909090909, |
| "grad_norm": 0.004609330889680118, |
| "learning_rate": 4.3434343434343435e-07, |
| "logits/generated": 3.857773542404175, |
| "logits/real": 3.0869548320770264, |
| "logps/generated": -1652.0277099609375, |
| "logps/real": -462.31866455078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -65.08594512939453, |
| "rewards/margins": 62.09027099609375, |
| "rewards/real": -2.995673656463623, |
| "step": 360 |
| }, |
| { |
| "epoch": 1.3454545454545455, |
| "grad_norm": 0.021695313180074092, |
| "learning_rate": 4.309764309764309e-07, |
| "logits/generated": 3.8632941246032715, |
| "logits/real": 3.208986759185791, |
| "logps/generated": -1602.880615234375, |
| "logps/real": -492.37872314453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -63.631866455078125, |
| "rewards/margins": 60.887290954589844, |
| "rewards/real": -2.7445733547210693, |
| "step": 370 |
| }, |
| { |
| "epoch": 1.3818181818181818, |
| "grad_norm": 0.08009450934560616, |
| "learning_rate": 4.276094276094276e-07, |
| "logits/generated": 3.9127426147460938, |
| "logits/real": 3.5540339946746826, |
| "logps/generated": -1360.1510009765625, |
| "logps/real": -459.3062438964844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -53.524253845214844, |
| "rewards/margins": 50.88030242919922, |
| "rewards/real": -2.6439599990844727, |
| "step": 380 |
| }, |
| { |
| "epoch": 1.4181818181818182, |
| "grad_norm": 0.015895453959011013, |
| "learning_rate": 4.242424242424242e-07, |
| "logits/generated": 3.659101963043213, |
| "logits/real": 3.691251277923584, |
| "logps/generated": -759.4293212890625, |
| "logps/real": -525.3631591796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -31.192474365234375, |
| "rewards/margins": 28.309253692626953, |
| "rewards/real": -2.883220672607422, |
| "step": 390 |
| }, |
| { |
| "epoch": 1.4545454545454546, |
| "grad_norm": 0.002626726892908866, |
| "learning_rate": 4.208754208754209e-07, |
| "logits/generated": 3.7086901664733887, |
| "logits/real": 3.498619794845581, |
| "logps/generated": -1179.454833984375, |
| "logps/real": -460.157958984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -48.267391204833984, |
| "rewards/margins": 45.48485565185547, |
| "rewards/real": -2.782532215118408, |
| "step": 400 |
| }, |
| { |
| "epoch": 1.490909090909091, |
| "grad_norm": 0.005161280214896955, |
| "learning_rate": 4.1750841750841746e-07, |
| "logits/generated": 3.730764389038086, |
| "logits/real": 3.284414291381836, |
| "logps/generated": -1389.023193359375, |
| "logps/real": -472.274658203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -56.40633010864258, |
| "rewards/margins": 53.044944763183594, |
| "rewards/real": -3.3613810539245605, |
| "step": 410 |
| }, |
| { |
| "epoch": 1.5272727272727273, |
| "grad_norm": 0.01203825929109908, |
| "learning_rate": 4.1414141414141413e-07, |
| "logits/generated": 3.4772918224334717, |
| "logits/real": 3.7250218391418457, |
| "logps/generated": -1553.1036376953125, |
| "logps/real": -465.3892517089844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -63.161415100097656, |
| "rewards/margins": 59.865386962890625, |
| "rewards/real": -3.29602313041687, |
| "step": 420 |
| }, |
| { |
| "epoch": 1.5636363636363635, |
| "grad_norm": 0.00954567611305199, |
| "learning_rate": 4.1077441077441075e-07, |
| "logits/generated": 4.080082893371582, |
| "logits/real": 3.5112476348876953, |
| "logps/generated": -1215.934326171875, |
| "logps/real": -410.541748046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -47.794490814208984, |
| "rewards/margins": 45.22086715698242, |
| "rewards/real": -2.5736231803894043, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 0.0021520111594958385, |
| "learning_rate": 4.0740740740740737e-07, |
| "logits/generated": 3.6889781951904297, |
| "logits/real": 3.5634422302246094, |
| "logps/generated": -1326.2265625, |
| "logps/real": -418.804931640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -53.98466110229492, |
| "rewards/margins": 51.10773849487305, |
| "rewards/real": -2.876925230026245, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.6363636363636362, |
| "grad_norm": 0.0063222673485985, |
| "learning_rate": 4.04040404040404e-07, |
| "logits/generated": 3.561480760574341, |
| "logits/real": 3.5089659690856934, |
| "logps/generated": -1199.6884765625, |
| "logps/real": -423.364990234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -48.4073486328125, |
| "rewards/margins": 45.80794143676758, |
| "rewards/real": -2.599404811859131, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.6727272727272728, |
| "grad_norm": 0.000768791949879652, |
| "learning_rate": 4.0067340067340067e-07, |
| "logits/generated": 3.9211220741271973, |
| "logits/real": 3.7459423542022705, |
| "logps/generated": -1202.2457275390625, |
| "logps/real": -396.33935546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -50.28257751464844, |
| "rewards/margins": 48.02632522583008, |
| "rewards/real": -2.256255865097046, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.709090909090909, |
| "grad_norm": 0.01600856274211648, |
| "learning_rate": 3.973063973063973e-07, |
| "logits/generated": 3.9792861938476562, |
| "logits/real": 3.7603824138641357, |
| "logps/generated": -1093.778564453125, |
| "logps/real": -392.31646728515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -43.18585205078125, |
| "rewards/margins": 40.816978454589844, |
| "rewards/real": -2.368870973587036, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.7454545454545456, |
| "grad_norm": 0.006151439686373968, |
| "learning_rate": 3.939393939393939e-07, |
| "logits/generated": 4.160303115844727, |
| "logits/real": 3.79850435256958, |
| "logps/generated": -1032.3426513671875, |
| "logps/real": -418.443359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -39.8023681640625, |
| "rewards/margins": 37.66193389892578, |
| "rewards/real": -2.1404361724853516, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.7818181818181817, |
| "grad_norm": 0.0011853963068352326, |
| "learning_rate": 3.9057239057239053e-07, |
| "logits/generated": 3.737189531326294, |
| "logits/real": 3.491539478302002, |
| "logps/generated": -1278.316650390625, |
| "logps/real": -399.8204040527344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -50.095821380615234, |
| "rewards/margins": 47.98601531982422, |
| "rewards/real": -2.1098055839538574, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.8181818181818183, |
| "grad_norm": 0.005555711556241692, |
| "learning_rate": 3.872053872053872e-07, |
| "logits/generated": 3.7458102703094482, |
| "logits/real": 3.904841184616089, |
| "logps/generated": -1269.622802734375, |
| "logps/real": -502.83538818359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -51.42805862426758, |
| "rewards/margins": 49.014373779296875, |
| "rewards/real": -2.4136805534362793, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.8545454545454545, |
| "grad_norm": 0.02264974580958711, |
| "learning_rate": 3.8383838383838377e-07, |
| "logits/generated": 4.089097023010254, |
| "logits/real": 4.16144323348999, |
| "logps/generated": -1649.2330322265625, |
| "logps/real": -356.6659851074219, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -65.34940338134766, |
| "rewards/margins": 63.115943908691406, |
| "rewards/real": -2.2334671020507812, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.8909090909090909, |
| "grad_norm": 0.0030795735098584744, |
| "learning_rate": 3.8047138047138045e-07, |
| "logits/generated": 3.60687255859375, |
| "logits/real": 3.62739634513855, |
| "logps/generated": -1154.067626953125, |
| "logps/real": -519.19677734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -45.407779693603516, |
| "rewards/margins": 42.44924545288086, |
| "rewards/real": -2.958531379699707, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.9272727272727272, |
| "grad_norm": 0.00743718788451984, |
| "learning_rate": 3.7710437710437707e-07, |
| "logits/generated": 4.003333568572998, |
| "logits/real": 3.6320443153381348, |
| "logps/generated": -1574.7633056640625, |
| "logps/real": -430.81146240234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -63.63165283203125, |
| "rewards/margins": 60.943687438964844, |
| "rewards/real": -2.6879489421844482, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.9636363636363636, |
| "grad_norm": 0.003581692627886563, |
| "learning_rate": 3.7373737373737374e-07, |
| "logits/generated": 4.060280799865723, |
| "logits/real": 3.6592605113983154, |
| "logps/generated": -1586.013916015625, |
| "logps/real": -422.39892578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -64.56217956542969, |
| "rewards/margins": 61.98358917236328, |
| "rewards/real": -2.5785882472991943, |
| "step": 540 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.004087463581422144, |
| "learning_rate": 3.703703703703703e-07, |
| "logits/generated": 3.9546761512756348, |
| "logits/real": 3.698378801345825, |
| "logps/generated": -1893.251220703125, |
| "logps/real": -402.0331726074219, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -77.40522003173828, |
| "rewards/margins": 74.73800659179688, |
| "rewards/real": -2.6672160625457764, |
| "step": 550 |
| }, |
| { |
| "epoch": 2.036363636363636, |
| "grad_norm": 0.0026569888763763455, |
| "learning_rate": 3.67003367003367e-07, |
| "logits/generated": 3.850944995880127, |
| "logits/real": 3.5789389610290527, |
| "logps/generated": -1203.314453125, |
| "logps/real": -477.9483337402344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -51.28553009033203, |
| "rewards/margins": 47.97153854370117, |
| "rewards/real": -3.3139939308166504, |
| "step": 560 |
| }, |
| { |
| "epoch": 2.0727272727272728, |
| "grad_norm": 0.002300572924014686, |
| "learning_rate": 3.636363636363636e-07, |
| "logits/generated": 3.9447109699249268, |
| "logits/real": 3.6901111602783203, |
| "logps/generated": -1685.8695068359375, |
| "logps/real": -463.8533630371094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -68.90098571777344, |
| "rewards/margins": 65.87489318847656, |
| "rewards/real": -3.0261025428771973, |
| "step": 570 |
| }, |
| { |
| "epoch": 2.109090909090909, |
| "grad_norm": 0.003583503892977523, |
| "learning_rate": 3.602693602693603e-07, |
| "logits/generated": 4.006188869476318, |
| "logits/real": 4.006190299987793, |
| "logps/generated": -1552.8453369140625, |
| "logps/real": -402.8440856933594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -64.53579711914062, |
| "rewards/margins": 61.512725830078125, |
| "rewards/real": -3.0230753421783447, |
| "step": 580 |
| }, |
| { |
| "epoch": 2.1454545454545455, |
| "grad_norm": 0.007705162380818939, |
| "learning_rate": 3.5690235690235685e-07, |
| "logits/generated": 4.073629379272461, |
| "logits/real": 3.875983476638794, |
| "logps/generated": -1343.013427734375, |
| "logps/real": -382.87689208984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -55.713523864746094, |
| "rewards/margins": 52.94609832763672, |
| "rewards/real": -2.7674267292022705, |
| "step": 590 |
| }, |
| { |
| "epoch": 2.1818181818181817, |
| "grad_norm": 0.013172669628685494, |
| "learning_rate": 3.535353535353535e-07, |
| "logits/generated": 4.253252983093262, |
| "logits/real": 3.769242525100708, |
| "logps/generated": -1818.175048828125, |
| "logps/real": -398.22308349609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -75.47774505615234, |
| "rewards/margins": 72.8851318359375, |
| "rewards/real": -2.5926156044006348, |
| "step": 600 |
| }, |
| { |
| "epoch": 2.2181818181818183, |
| "grad_norm": 0.007024294960501395, |
| "learning_rate": 3.5016835016835014e-07, |
| "logits/generated": 3.699592113494873, |
| "logits/real": 3.579563856124878, |
| "logps/generated": -889.1650390625, |
| "logps/real": -469.04815673828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -37.172508239746094, |
| "rewards/margins": 33.740169525146484, |
| "rewards/real": -3.432338237762451, |
| "step": 610 |
| }, |
| { |
| "epoch": 2.2545454545454544, |
| "grad_norm": 0.0009392103287631958, |
| "learning_rate": 3.4680134680134676e-07, |
| "logits/generated": 4.1535139083862305, |
| "logits/real": 3.8194522857666016, |
| "logps/generated": -1222.1712646484375, |
| "logps/real": -433.2972106933594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -51.39905548095703, |
| "rewards/margins": 48.590370178222656, |
| "rewards/real": -2.808685541152954, |
| "step": 620 |
| }, |
| { |
| "epoch": 2.290909090909091, |
| "grad_norm": 0.0054695000716611505, |
| "learning_rate": 3.434343434343434e-07, |
| "logits/generated": 4.002943992614746, |
| "logits/real": 3.655449628829956, |
| "logps/generated": -1462.0904541015625, |
| "logps/real": -442.56365966796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -60.3794059753418, |
| "rewards/margins": 57.272552490234375, |
| "rewards/real": -3.106855630874634, |
| "step": 630 |
| }, |
| { |
| "epoch": 2.327272727272727, |
| "grad_norm": 0.011303853467808675, |
| "learning_rate": 3.4006734006734006e-07, |
| "logits/generated": 3.58341908454895, |
| "logits/real": 3.253676176071167, |
| "logps/generated": -1504.0076904296875, |
| "logps/real": -466.627685546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -62.34798049926758, |
| "rewards/margins": 59.16129684448242, |
| "rewards/real": -3.1866836547851562, |
| "step": 640 |
| }, |
| { |
| "epoch": 2.3636363636363638, |
| "grad_norm": 0.0009684613032072848, |
| "learning_rate": 3.3670033670033673e-07, |
| "logits/generated": 4.34421443939209, |
| "logits/real": 3.901693820953369, |
| "logps/generated": -1832.802490234375, |
| "logps/real": -452.336181640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -76.98825073242188, |
| "rewards/margins": 74.16451263427734, |
| "rewards/real": -2.823739767074585, |
| "step": 650 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 0.00016207364423209973, |
| "learning_rate": 3.333333333333333e-07, |
| "logits/generated": 3.6365978717803955, |
| "logits/real": 3.4315154552459717, |
| "logps/generated": -1143.705078125, |
| "logps/real": -484.4818420410156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -48.88311004638672, |
| "rewards/margins": 45.60661315917969, |
| "rewards/real": -3.2764992713928223, |
| "step": 660 |
| }, |
| { |
| "epoch": 2.4363636363636365, |
| "grad_norm": 0.010502550164526083, |
| "learning_rate": 3.2996632996633e-07, |
| "logits/generated": 3.862056016921997, |
| "logits/real": 3.24928617477417, |
| "logps/generated": -1533.475830078125, |
| "logps/real": -453.9549255371094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -63.62640380859375, |
| "rewards/margins": 60.355133056640625, |
| "rewards/real": -3.271270275115967, |
| "step": 670 |
| }, |
| { |
| "epoch": 2.4727272727272727, |
| "grad_norm": 0.0002909304249002409, |
| "learning_rate": 3.265993265993266e-07, |
| "logits/generated": 3.5648250579833984, |
| "logits/real": 3.6869399547576904, |
| "logps/generated": -912.8480224609375, |
| "logps/real": -476.197509765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -40.917945861816406, |
| "rewards/margins": 37.436744689941406, |
| "rewards/real": -3.4812045097351074, |
| "step": 680 |
| }, |
| { |
| "epoch": 2.509090909090909, |
| "grad_norm": 0.00032313616431939676, |
| "learning_rate": 3.2323232323232327e-07, |
| "logits/generated": 3.8342528343200684, |
| "logits/real": 3.8705894947052, |
| "logps/generated": -854.0946044921875, |
| "logps/real": -459.92529296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -38.8852653503418, |
| "rewards/margins": 35.70844650268555, |
| "rewards/real": -3.1768181324005127, |
| "step": 690 |
| }, |
| { |
| "epoch": 2.5454545454545454, |
| "grad_norm": 0.0021219902906988715, |
| "learning_rate": 3.1986531986531984e-07, |
| "logits/generated": 3.783543109893799, |
| "logits/real": 3.4980130195617676, |
| "logps/generated": -1084.9097900390625, |
| "logps/real": -487.141845703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -46.62445068359375, |
| "rewards/margins": 43.05109786987305, |
| "rewards/real": -3.5733554363250732, |
| "step": 700 |
| }, |
| { |
| "epoch": 2.581818181818182, |
| "grad_norm": 0.0014337911425730536, |
| "learning_rate": 3.164983164983165e-07, |
| "logits/generated": 3.6583492755889893, |
| "logits/real": 3.749937057495117, |
| "logps/generated": -1316.029052734375, |
| "logps/real": -508.31683349609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -56.47169876098633, |
| "rewards/margins": 52.8726806640625, |
| "rewards/real": -3.5990149974823, |
| "step": 710 |
| }, |
| { |
| "epoch": 2.618181818181818, |
| "grad_norm": 0.0028510300745991563, |
| "learning_rate": 3.1313131313131313e-07, |
| "logits/generated": 3.9335789680480957, |
| "logits/real": 3.828601837158203, |
| "logps/generated": -1295.112060546875, |
| "logps/real": -437.5403747558594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -56.59820556640625, |
| "rewards/margins": 53.75188064575195, |
| "rewards/real": -2.846327066421509, |
| "step": 720 |
| }, |
| { |
| "epoch": 2.6545454545454543, |
| "grad_norm": 0.003930759640413619, |
| "learning_rate": 3.0976430976430975e-07, |
| "logits/generated": 3.507559299468994, |
| "logits/real": 3.472404956817627, |
| "logps/generated": -1357.832275390625, |
| "logps/real": -477.1338806152344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -57.733192443847656, |
| "rewards/margins": 54.20581817626953, |
| "rewards/real": -3.527372360229492, |
| "step": 730 |
| }, |
| { |
| "epoch": 2.690909090909091, |
| "grad_norm": 0.0017241967233385743, |
| "learning_rate": 3.063973063973064e-07, |
| "logits/generated": 3.7288355827331543, |
| "logits/real": 3.64813232421875, |
| "logps/generated": -1540.7183837890625, |
| "logps/real": -501.73480224609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -63.22703170776367, |
| "rewards/margins": 59.9117546081543, |
| "rewards/real": -3.3152732849121094, |
| "step": 740 |
| }, |
| { |
| "epoch": 2.7272727272727275, |
| "grad_norm": 0.007579448915312288, |
| "learning_rate": 3.0303030303030305e-07, |
| "logits/generated": 3.859321117401123, |
| "logits/real": 3.7037596702575684, |
| "logps/generated": -1236.6575927734375, |
| "logps/real": -546.4544067382812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -51.599647521972656, |
| "rewards/margins": 47.99446105957031, |
| "rewards/real": -3.605181932449341, |
| "step": 750 |
| }, |
| { |
| "epoch": 2.7636363636363637, |
| "grad_norm": 0.00014409344199114584, |
| "learning_rate": 2.9966329966329967e-07, |
| "logits/generated": 3.5292091369628906, |
| "logits/real": 3.565258741378784, |
| "logps/generated": -1531.6552734375, |
| "logps/real": -448.81268310546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -65.52954864501953, |
| "rewards/margins": 62.135093688964844, |
| "rewards/real": -3.394456386566162, |
| "step": 760 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 0.0013684000564848744, |
| "learning_rate": 2.962962962962963e-07, |
| "logits/generated": 4.109580039978027, |
| "logits/real": 3.457909345626831, |
| "logps/generated": -1383.8349609375, |
| "logps/real": -425.76324462890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -59.45878982543945, |
| "rewards/margins": 56.1872444152832, |
| "rewards/real": -3.2715511322021484, |
| "step": 770 |
| }, |
| { |
| "epoch": 2.8363636363636364, |
| "grad_norm": 0.005777678000195546, |
| "learning_rate": 2.929292929292929e-07, |
| "logits/generated": 3.7321064472198486, |
| "logits/real": 3.8243510723114014, |
| "logps/generated": -1196.93115234375, |
| "logps/real": -456.53228759765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -51.86643600463867, |
| "rewards/margins": 48.43678665161133, |
| "rewards/real": -3.429652452468872, |
| "step": 780 |
| }, |
| { |
| "epoch": 2.8727272727272726, |
| "grad_norm": 0.0009321131480379054, |
| "learning_rate": 2.895622895622896e-07, |
| "logits/generated": 3.882500171661377, |
| "logits/real": 3.6975486278533936, |
| "logps/generated": -1355.469482421875, |
| "logps/real": -484.16058349609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -60.43659591674805, |
| "rewards/margins": 56.8776741027832, |
| "rewards/real": -3.558910369873047, |
| "step": 790 |
| }, |
| { |
| "epoch": 2.909090909090909, |
| "grad_norm": 0.001991138099404004, |
| "learning_rate": 2.8619528619528615e-07, |
| "logits/generated": 3.718451976776123, |
| "logits/real": 3.651914596557617, |
| "logps/generated": -1605.8145751953125, |
| "logps/real": -450.0298767089844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -70.13053131103516, |
| "rewards/margins": 66.21720886230469, |
| "rewards/real": -3.9133262634277344, |
| "step": 800 |
| }, |
| { |
| "epoch": 2.9454545454545453, |
| "grad_norm": 7.359425704162107e-05, |
| "learning_rate": 2.8282828282828283e-07, |
| "logits/generated": 3.7316231727600098, |
| "logits/real": 3.4421298503875732, |
| "logps/generated": -1398.3856201171875, |
| "logps/real": -456.47723388671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -61.99951171875, |
| "rewards/margins": 58.09382247924805, |
| "rewards/real": -3.905689239501953, |
| "step": 810 |
| }, |
| { |
| "epoch": 2.981818181818182, |
| "grad_norm": 0.0001666930627991202, |
| "learning_rate": 2.7946127946127945e-07, |
| "logits/generated": 3.904003858566284, |
| "logits/real": 3.768629789352417, |
| "logps/generated": -1664.2711181640625, |
| "logps/real": -424.12860107421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -72.53042602539062, |
| "rewards/margins": 69.21732330322266, |
| "rewards/real": -3.313105821609497, |
| "step": 820 |
| }, |
| { |
| "epoch": 3.018181818181818, |
| "grad_norm": 0.0015541320296847036, |
| "learning_rate": 2.760942760942761e-07, |
| "logits/generated": 4.119956970214844, |
| "logits/real": 4.344512939453125, |
| "logps/generated": -1267.0272216796875, |
| "logps/real": -452.1845703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -55.162574768066406, |
| "rewards/margins": 51.5660514831543, |
| "rewards/real": -3.5965213775634766, |
| "step": 830 |
| }, |
| { |
| "epoch": 3.0545454545454547, |
| "grad_norm": 0.00011859762362161128, |
| "learning_rate": 2.727272727272727e-07, |
| "logits/generated": 4.090231895446777, |
| "logits/real": 4.092189788818359, |
| "logps/generated": -1745.3656005859375, |
| "logps/real": -447.81494140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -74.62300872802734, |
| "rewards/margins": 70.8793716430664, |
| "rewards/real": -3.7436347007751465, |
| "step": 840 |
| }, |
| { |
| "epoch": 3.090909090909091, |
| "grad_norm": 0.0006239804571906667, |
| "learning_rate": 2.6936026936026936e-07, |
| "logits/generated": 3.666217803955078, |
| "logits/real": 3.422201633453369, |
| "logps/generated": -1149.569580078125, |
| "logps/real": -458.9976501464844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -51.963539123535156, |
| "rewards/margins": 48.148681640625, |
| "rewards/real": -3.8148579597473145, |
| "step": 850 |
| }, |
| { |
| "epoch": 3.1272727272727274, |
| "grad_norm": 0.00015905022058128955, |
| "learning_rate": 2.65993265993266e-07, |
| "logits/generated": 4.04958963394165, |
| "logits/real": 3.5136730670928955, |
| "logps/generated": -1752.6058349609375, |
| "logps/real": -387.2957458496094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -75.6007308959961, |
| "rewards/margins": 71.88221740722656, |
| "rewards/real": -3.7185134887695312, |
| "step": 860 |
| }, |
| { |
| "epoch": 3.1636363636363636, |
| "grad_norm": 0.002004494912082122, |
| "learning_rate": 2.6262626262626266e-07, |
| "logits/generated": 4.12989616394043, |
| "logits/real": 3.6296894550323486, |
| "logps/generated": -1105.5179443359375, |
| "logps/real": -386.453369140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -48.736148834228516, |
| "rewards/margins": 45.57777786254883, |
| "rewards/real": -3.1583666801452637, |
| "step": 870 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 0.0012155564568384315, |
| "learning_rate": 2.5925925925925923e-07, |
| "logits/generated": 3.8862648010253906, |
| "logits/real": 3.694721221923828, |
| "logps/generated": -1257.2886962890625, |
| "logps/real": -369.3799743652344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -54.6549186706543, |
| "rewards/margins": 51.277061462402344, |
| "rewards/real": -3.377854824066162, |
| "step": 880 |
| }, |
| { |
| "epoch": 3.2363636363636363, |
| "grad_norm": 0.006589071175646788, |
| "learning_rate": 2.558922558922559e-07, |
| "logits/generated": 3.9840781688690186, |
| "logits/real": 3.4879355430603027, |
| "logps/generated": -1746.123779296875, |
| "logps/real": -440.9857482910156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -76.70772552490234, |
| "rewards/margins": 72.97891998291016, |
| "rewards/real": -3.7288193702697754, |
| "step": 890 |
| }, |
| { |
| "epoch": 3.2727272727272725, |
| "grad_norm": 0.0030722271284426565, |
| "learning_rate": 2.525252525252525e-07, |
| "logits/generated": 3.954289674758911, |
| "logits/real": 3.8279597759246826, |
| "logps/generated": -1498.404296875, |
| "logps/real": -479.84222412109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -65.67853546142578, |
| "rewards/margins": 61.83612060546875, |
| "rewards/real": -3.8424181938171387, |
| "step": 900 |
| }, |
| { |
| "epoch": 3.309090909090909, |
| "grad_norm": 0.0001753130329635927, |
| "learning_rate": 2.4915824915824914e-07, |
| "logits/generated": 4.019095420837402, |
| "logits/real": 3.8157622814178467, |
| "logps/generated": -1326.3746337890625, |
| "logps/real": -370.5709228515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -59.56660842895508, |
| "rewards/margins": 55.939353942871094, |
| "rewards/real": -3.6272518634796143, |
| "step": 910 |
| }, |
| { |
| "epoch": 3.3454545454545457, |
| "grad_norm": 0.004363807101820759, |
| "learning_rate": 2.4579124579124576e-07, |
| "logits/generated": 4.095259189605713, |
| "logits/real": 3.8618171215057373, |
| "logps/generated": -1519.919189453125, |
| "logps/real": -453.9547424316406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -67.54315948486328, |
| "rewards/margins": 63.63945388793945, |
| "rewards/real": -3.9037089347839355, |
| "step": 920 |
| }, |
| { |
| "epoch": 3.381818181818182, |
| "grad_norm": 0.0014407534499616213, |
| "learning_rate": 2.4242424242424244e-07, |
| "logits/generated": 3.8879623413085938, |
| "logits/real": 3.7041187286376953, |
| "logps/generated": -1519.233154296875, |
| "logps/real": -458.47625732421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -65.14590454101562, |
| "rewards/margins": 61.69196701049805, |
| "rewards/real": -3.453932285308838, |
| "step": 930 |
| }, |
| { |
| "epoch": 3.418181818181818, |
| "grad_norm": 0.00018436218899847752, |
| "learning_rate": 2.3905723905723906e-07, |
| "logits/generated": 3.6360068321228027, |
| "logits/real": 3.7878594398498535, |
| "logps/generated": -1373.73828125, |
| "logps/real": -496.204345703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -61.42437744140625, |
| "rewards/margins": 57.20280075073242, |
| "rewards/real": -4.221576690673828, |
| "step": 940 |
| }, |
| { |
| "epoch": 3.4545454545454546, |
| "grad_norm": 9.983852335117753e-05, |
| "learning_rate": 2.3569023569023568e-07, |
| "logits/generated": 3.9673848152160645, |
| "logits/real": 4.219111919403076, |
| "logps/generated": -1223.6982421875, |
| "logps/real": -411.6839904785156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -54.2238655090332, |
| "rewards/margins": 50.91444778442383, |
| "rewards/real": -3.309422731399536, |
| "step": 950 |
| }, |
| { |
| "epoch": 3.4909090909090907, |
| "grad_norm": 0.0007948451957228228, |
| "learning_rate": 2.323232323232323e-07, |
| "logits/generated": 3.7442917823791504, |
| "logits/real": 3.6274819374084473, |
| "logps/generated": -1369.765625, |
| "logps/real": -449.767578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -61.11466598510742, |
| "rewards/margins": 57.37229537963867, |
| "rewards/real": -3.742368698120117, |
| "step": 960 |
| }, |
| { |
| "epoch": 3.5272727272727273, |
| "grad_norm": 0.0030600399757966723, |
| "learning_rate": 2.2895622895622895e-07, |
| "logits/generated": 3.8217289447784424, |
| "logits/real": 3.576320171356201, |
| "logps/generated": -1459.0333251953125, |
| "logps/real": -448.5374450683594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -63.0662727355957, |
| "rewards/margins": 59.22290802001953, |
| "rewards/real": -3.8433589935302734, |
| "step": 970 |
| }, |
| { |
| "epoch": 3.5636363636363635, |
| "grad_norm": 0.004163869838731672, |
| "learning_rate": 2.2558922558922557e-07, |
| "logits/generated": 4.298056125640869, |
| "logits/real": 3.4841365814208984, |
| "logps/generated": -1740.793701171875, |
| "logps/real": -441.3580017089844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -75.46671295166016, |
| "rewards/margins": 71.69257354736328, |
| "rewards/real": -3.7741341590881348, |
| "step": 980 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 0.0019080007177829318, |
| "learning_rate": 2.222222222222222e-07, |
| "logits/generated": 3.853611469268799, |
| "logits/real": 3.6462883949279785, |
| "logps/generated": -1441.735595703125, |
| "logps/real": -473.94879150390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -63.321319580078125, |
| "rewards/margins": 59.3939094543457, |
| "rewards/real": -3.92741322517395, |
| "step": 990 |
| }, |
| { |
| "epoch": 3.6363636363636362, |
| "grad_norm": 0.000704839046472418, |
| "learning_rate": 2.1885521885521884e-07, |
| "logits/generated": 4.106793403625488, |
| "logits/real": 3.7004337310791016, |
| "logps/generated": -1413.923583984375, |
| "logps/real": -472.50238037109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -62.2875862121582, |
| "rewards/margins": 58.44181442260742, |
| "rewards/real": -3.8457672595977783, |
| "step": 1000 |
| }, |
| { |
| "epoch": 3.672727272727273, |
| "grad_norm": 0.000858452546126466, |
| "learning_rate": 2.1548821548821546e-07, |
| "logits/generated": 4.079923629760742, |
| "logits/real": 3.8820438385009766, |
| "logps/generated": -1516.7833251953125, |
| "logps/real": -461.62078857421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -67.4074478149414, |
| "rewards/margins": 63.33385467529297, |
| "rewards/real": -4.073586940765381, |
| "step": 1010 |
| }, |
| { |
| "epoch": 3.709090909090909, |
| "grad_norm": 0.0010031363875996075, |
| "learning_rate": 2.121212121212121e-07, |
| "logits/generated": 3.767723798751831, |
| "logits/real": 3.8114075660705566, |
| "logps/generated": -1460.832275390625, |
| "logps/real": -497.56201171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -64.61216735839844, |
| "rewards/margins": 60.64111328125, |
| "rewards/real": -3.971052885055542, |
| "step": 1020 |
| }, |
| { |
| "epoch": 3.7454545454545456, |
| "grad_norm": 0.0024116334439129978, |
| "learning_rate": 2.0875420875420873e-07, |
| "logits/generated": 4.043444633483887, |
| "logits/real": 3.8120369911193848, |
| "logps/generated": -1331.077880859375, |
| "logps/real": -454.9960021972656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -59.663169860839844, |
| "rewards/margins": 55.51136016845703, |
| "rewards/real": -4.151822090148926, |
| "step": 1030 |
| }, |
| { |
| "epoch": 3.7818181818181817, |
| "grad_norm": 0.0015199595459823992, |
| "learning_rate": 2.0538720538720538e-07, |
| "logits/generated": 3.829423189163208, |
| "logits/real": 3.5104575157165527, |
| "logps/generated": -1270.5301513671875, |
| "logps/real": -429.19122314453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -55.3685417175293, |
| "rewards/margins": 51.80549240112305, |
| "rewards/real": -3.5630505084991455, |
| "step": 1040 |
| }, |
| { |
| "epoch": 3.8181818181818183, |
| "grad_norm": 0.0005563199954904515, |
| "learning_rate": 2.02020202020202e-07, |
| "logits/generated": 3.9071640968322754, |
| "logits/real": 3.7608370780944824, |
| "logps/generated": -1778.2320556640625, |
| "logps/real": -423.9949645996094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -80.55699157714844, |
| "rewards/margins": 76.7548828125, |
| "rewards/real": -3.802103042602539, |
| "step": 1050 |
| }, |
| { |
| "epoch": 3.8545454545454545, |
| "grad_norm": 0.004755420237394011, |
| "learning_rate": 1.9865319865319864e-07, |
| "logits/generated": 4.1636857986450195, |
| "logits/real": 3.379361629486084, |
| "logps/generated": -1448.9886474609375, |
| "logps/real": -389.80328369140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -65.82633209228516, |
| "rewards/margins": 62.258392333984375, |
| "rewards/real": -3.5679428577423096, |
| "step": 1060 |
| }, |
| { |
| "epoch": 3.8909090909090907, |
| "grad_norm": 3.783155401402485e-05, |
| "learning_rate": 1.9528619528619527e-07, |
| "logits/generated": 3.940641403198242, |
| "logits/real": 4.008017063140869, |
| "logps/generated": -1480.460693359375, |
| "logps/real": -451.5081481933594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -66.01498413085938, |
| "rewards/margins": 62.06439971923828, |
| "rewards/real": -3.9505832195281982, |
| "step": 1070 |
| }, |
| { |
| "epoch": 3.9272727272727272, |
| "grad_norm": 0.0012735410485099623, |
| "learning_rate": 1.9191919191919189e-07, |
| "logits/generated": 3.90619158744812, |
| "logits/real": 3.8133018016815186, |
| "logps/generated": -1917.876708984375, |
| "logps/real": -466.3075256347656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -87.14842224121094, |
| "rewards/margins": 83.11375427246094, |
| "rewards/real": -4.034660816192627, |
| "step": 1080 |
| }, |
| { |
| "epoch": 3.963636363636364, |
| "grad_norm": 0.0007351500072224141, |
| "learning_rate": 1.8855218855218853e-07, |
| "logits/generated": 3.9110474586486816, |
| "logits/real": 3.5808777809143066, |
| "logps/generated": -1701.5618896484375, |
| "logps/real": -473.091064453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -75.6848373413086, |
| "rewards/margins": 71.86578369140625, |
| "rewards/real": -3.8190536499023438, |
| "step": 1090 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 5.6591036032243704e-05, |
| "learning_rate": 1.8518518518518516e-07, |
| "logits/generated": 3.9563984870910645, |
| "logits/real": 3.6396121978759766, |
| "logps/generated": -1587.2015380859375, |
| "logps/real": -424.1458435058594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -74.37104797363281, |
| "rewards/margins": 70.43859100341797, |
| "rewards/real": -3.9324498176574707, |
| "step": 1100 |
| }, |
| { |
| "epoch": 4.036363636363636, |
| "grad_norm": 0.0004206570610130193, |
| "learning_rate": 1.818181818181818e-07, |
| "logits/generated": 3.7221055030822754, |
| "logits/real": 3.831826686859131, |
| "logps/generated": -1685.308349609375, |
| "logps/real": -519.84130859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -74.87862396240234, |
| "rewards/margins": 70.57380676269531, |
| "rewards/real": -4.304826736450195, |
| "step": 1110 |
| }, |
| { |
| "epoch": 4.072727272727272, |
| "grad_norm": 0.001160372097406167, |
| "learning_rate": 1.7845117845117842e-07, |
| "logits/generated": 3.6212050914764404, |
| "logits/real": 3.7864627838134766, |
| "logps/generated": -1583.37158203125, |
| "logps/real": -511.188232421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -74.45861053466797, |
| "rewards/margins": 69.92201232910156, |
| "rewards/real": -4.53659725189209, |
| "step": 1120 |
| }, |
| { |
| "epoch": 4.109090909090909, |
| "grad_norm": 8.539568574380086e-05, |
| "learning_rate": 1.7508417508417507e-07, |
| "logits/generated": 3.8307089805603027, |
| "logits/real": 3.671238422393799, |
| "logps/generated": -1238.8209228515625, |
| "logps/real": -452.064208984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -55.9775505065918, |
| "rewards/margins": 52.53010940551758, |
| "rewards/real": -3.4474411010742188, |
| "step": 1130 |
| }, |
| { |
| "epoch": 4.1454545454545455, |
| "grad_norm": 0.0008660641077104146, |
| "learning_rate": 1.717171717171717e-07, |
| "logits/generated": 3.8134942054748535, |
| "logits/real": 3.990009307861328, |
| "logps/generated": -1304.6759033203125, |
| "logps/real": -445.03826904296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -59.402122497558594, |
| "rewards/margins": 55.251441955566406, |
| "rewards/real": -4.150691032409668, |
| "step": 1140 |
| }, |
| { |
| "epoch": 4.181818181818182, |
| "grad_norm": 0.0001382904157899484, |
| "learning_rate": 1.6835016835016837e-07, |
| "logits/generated": 4.140042304992676, |
| "logits/real": 3.885005474090576, |
| "logps/generated": -1764.208984375, |
| "logps/real": -455.87921142578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -79.93865203857422, |
| "rewards/margins": 75.69930267333984, |
| "rewards/real": -4.239354133605957, |
| "step": 1150 |
| }, |
| { |
| "epoch": 4.218181818181818, |
| "grad_norm": 0.0004243586857810705, |
| "learning_rate": 1.64983164983165e-07, |
| "logits/generated": 3.5448012351989746, |
| "logits/real": 3.717078447341919, |
| "logps/generated": -1162.357666015625, |
| "logps/real": -518.4683227539062, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -51.90681076049805, |
| "rewards/margins": 47.858604431152344, |
| "rewards/real": -4.048200607299805, |
| "step": 1160 |
| }, |
| { |
| "epoch": 4.254545454545455, |
| "grad_norm": 0.000987239513688741, |
| "learning_rate": 1.6161616161616163e-07, |
| "logits/generated": 4.015206336975098, |
| "logits/real": 3.729823350906372, |
| "logps/generated": -2077.96533203125, |
| "logps/real": -495.60015869140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -91.87440490722656, |
| "rewards/margins": 87.61203002929688, |
| "rewards/real": -4.262367248535156, |
| "step": 1170 |
| }, |
| { |
| "epoch": 4.290909090909091, |
| "grad_norm": 0.00037696538230314736, |
| "learning_rate": 1.5824915824915826e-07, |
| "logits/generated": 3.7414791584014893, |
| "logits/real": 3.636801242828369, |
| "logps/generated": -1549.16796875, |
| "logps/real": -531.6744384765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -70.65663146972656, |
| "rewards/margins": 65.79484558105469, |
| "rewards/real": -4.86180305480957, |
| "step": 1180 |
| }, |
| { |
| "epoch": 4.327272727272727, |
| "grad_norm": 0.00010522895913070764, |
| "learning_rate": 1.5488215488215488e-07, |
| "logits/generated": 3.7180187702178955, |
| "logits/real": 4.073853492736816, |
| "logps/generated": -1401.19775390625, |
| "logps/real": -431.75274658203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -67.09441375732422, |
| "rewards/margins": 62.73823165893555, |
| "rewards/real": -4.356192111968994, |
| "step": 1190 |
| }, |
| { |
| "epoch": 4.363636363636363, |
| "grad_norm": 0.000493590978374745, |
| "learning_rate": 1.5151515151515152e-07, |
| "logits/generated": 3.5975699424743652, |
| "logits/real": 3.689765214920044, |
| "logps/generated": -1326.864501953125, |
| "logps/real": -542.3211059570312, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -59.640357971191406, |
| "rewards/margins": 55.317596435546875, |
| "rewards/real": -4.322758674621582, |
| "step": 1200 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 0.00025492883366324984, |
| "learning_rate": 1.4814814814814815e-07, |
| "logits/generated": 3.959843873977661, |
| "logits/real": 3.6517300605773926, |
| "logps/generated": -1539.507080078125, |
| "logps/real": -475.3280334472656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -69.64662170410156, |
| "rewards/margins": 65.52012634277344, |
| "rewards/real": -4.12650203704834, |
| "step": 1210 |
| }, |
| { |
| "epoch": 4.4363636363636365, |
| "grad_norm": 0.00022938298497810698, |
| "learning_rate": 1.447811447811448e-07, |
| "logits/generated": 3.842700958251953, |
| "logits/real": 3.8824355602264404, |
| "logps/generated": -1134.2059326171875, |
| "logps/real": -459.9798278808594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -51.40458297729492, |
| "rewards/margins": 47.1477165222168, |
| "rewards/real": -4.256866931915283, |
| "step": 1220 |
| }, |
| { |
| "epoch": 4.472727272727273, |
| "grad_norm": 0.0004052529610557066, |
| "learning_rate": 1.4141414141414141e-07, |
| "logits/generated": 3.9025471210479736, |
| "logits/real": 3.9180073738098145, |
| "logps/generated": -1886.6363525390625, |
| "logps/real": -467.0265197753906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -85.01991271972656, |
| "rewards/margins": 80.82940673828125, |
| "rewards/real": -4.190503120422363, |
| "step": 1230 |
| }, |
| { |
| "epoch": 4.509090909090909, |
| "grad_norm": 0.0005280290781930737, |
| "learning_rate": 1.3804713804713806e-07, |
| "logits/generated": 4.019129753112793, |
| "logits/real": 3.748079776763916, |
| "logps/generated": -1440.4344482421875, |
| "logps/real": -405.66583251953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -67.43157958984375, |
| "rewards/margins": 63.11574172973633, |
| "rewards/real": -4.315838813781738, |
| "step": 1240 |
| }, |
| { |
| "epoch": 4.545454545454545, |
| "grad_norm": 5.366248566220936e-06, |
| "learning_rate": 1.3468013468013468e-07, |
| "logits/generated": 4.056360244750977, |
| "logits/real": 3.848876953125, |
| "logps/generated": -1692.2427978515625, |
| "logps/real": -400.6857604980469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -76.63723754882812, |
| "rewards/margins": 72.62677764892578, |
| "rewards/real": -4.010465145111084, |
| "step": 1250 |
| }, |
| { |
| "epoch": 4.581818181818182, |
| "grad_norm": 0.00047402245995597456, |
| "learning_rate": 1.3131313131313133e-07, |
| "logits/generated": 3.9295859336853027, |
| "logits/real": 3.7341480255126953, |
| "logps/generated": -1847.7982177734375, |
| "logps/real": -472.1689453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -85.64716339111328, |
| "rewards/margins": 80.90381622314453, |
| "rewards/real": -4.743343830108643, |
| "step": 1260 |
| }, |
| { |
| "epoch": 4.618181818181818, |
| "grad_norm": 0.00128487736629245, |
| "learning_rate": 1.2794612794612795e-07, |
| "logits/generated": 3.9448304176330566, |
| "logits/real": 3.9632275104522705, |
| "logps/generated": -1450.473388671875, |
| "logps/real": -479.2431640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -66.07244110107422, |
| "rewards/margins": 61.695777893066406, |
| "rewards/real": -4.376659870147705, |
| "step": 1270 |
| }, |
| { |
| "epoch": 4.654545454545454, |
| "grad_norm": 0.00013514069974056552, |
| "learning_rate": 1.2457912457912457e-07, |
| "logits/generated": 3.9676432609558105, |
| "logits/real": 3.487008571624756, |
| "logps/generated": -1180.776123046875, |
| "logps/real": -429.3609924316406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -57.64728927612305, |
| "rewards/margins": 53.328086853027344, |
| "rewards/real": -4.319206714630127, |
| "step": 1280 |
| }, |
| { |
| "epoch": 4.690909090909091, |
| "grad_norm": 0.0006303733396897195, |
| "learning_rate": 1.2121212121212122e-07, |
| "logits/generated": 4.023508548736572, |
| "logits/real": 3.998694658279419, |
| "logps/generated": -1272.932861328125, |
| "logps/real": -501.510986328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -59.267669677734375, |
| "rewards/margins": 54.53044891357422, |
| "rewards/real": -4.737218379974365, |
| "step": 1290 |
| }, |
| { |
| "epoch": 4.7272727272727275, |
| "grad_norm": 1.57814039841833e-05, |
| "learning_rate": 1.1784511784511784e-07, |
| "logits/generated": 3.772751569747925, |
| "logits/real": 3.721607208251953, |
| "logps/generated": -1451.5894775390625, |
| "logps/real": -440.76751708984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -66.0102767944336, |
| "rewards/margins": 61.77167892456055, |
| "rewards/real": -4.238595008850098, |
| "step": 1300 |
| }, |
| { |
| "epoch": 4.763636363636364, |
| "grad_norm": 6.235887123859738e-05, |
| "learning_rate": 1.1447811447811447e-07, |
| "logits/generated": 3.9907233715057373, |
| "logits/real": 3.906350612640381, |
| "logps/generated": -1149.9796142578125, |
| "logps/real": -501.718505859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -51.936546325683594, |
| "rewards/margins": 47.381919860839844, |
| "rewards/real": -4.554632186889648, |
| "step": 1310 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.0005202534829377769, |
| "learning_rate": 1.111111111111111e-07, |
| "logits/generated": 3.2805778980255127, |
| "logits/real": 3.3483023643493652, |
| "logps/generated": -896.7745971679688, |
| "logps/real": -473.29718017578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -43.71897506713867, |
| "rewards/margins": 39.41306686401367, |
| "rewards/real": -4.305908203125, |
| "step": 1320 |
| }, |
| { |
| "epoch": 4.836363636363636, |
| "grad_norm": 6.406018245203921e-05, |
| "learning_rate": 1.0774410774410773e-07, |
| "logits/generated": 3.2299580574035645, |
| "logits/real": 3.425579071044922, |
| "logps/generated": -923.6653442382812, |
| "logps/real": -446.96832275390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -45.724266052246094, |
| "rewards/margins": 41.449378967285156, |
| "rewards/real": -4.274886131286621, |
| "step": 1330 |
| }, |
| { |
| "epoch": 4.872727272727273, |
| "grad_norm": 0.00014454501073033914, |
| "learning_rate": 1.0437710437710436e-07, |
| "logits/generated": 3.5981407165527344, |
| "logits/real": 4.00681734085083, |
| "logps/generated": -918.5948486328125, |
| "logps/real": -426.62921142578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -43.384376525878906, |
| "rewards/margins": 38.89148712158203, |
| "rewards/real": -4.492888450622559, |
| "step": 1340 |
| }, |
| { |
| "epoch": 4.909090909090909, |
| "grad_norm": 0.0006254585346169349, |
| "learning_rate": 1.01010101010101e-07, |
| "logits/generated": 3.715498685836792, |
| "logits/real": 3.8721745014190674, |
| "logps/generated": -1560.0103759765625, |
| "logps/real": -510.93927001953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -72.44107055664062, |
| "rewards/margins": 67.83012390136719, |
| "rewards/real": -4.610942363739014, |
| "step": 1350 |
| }, |
| { |
| "epoch": 4.945454545454545, |
| "grad_norm": 0.0008769643975252069, |
| "learning_rate": 9.764309764309763e-08, |
| "logits/generated": 3.9168152809143066, |
| "logits/real": 3.5084102153778076, |
| "logps/generated": -1468.501220703125, |
| "logps/real": -501.52032470703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -69.21226501464844, |
| "rewards/margins": 64.94023132324219, |
| "rewards/real": -4.272032737731934, |
| "step": 1360 |
| }, |
| { |
| "epoch": 4.9818181818181815, |
| "grad_norm": 0.0012166612736284428, |
| "learning_rate": 9.427609427609427e-08, |
| "logits/generated": 3.681574583053589, |
| "logits/real": 4.426842212677002, |
| "logps/generated": -1462.860595703125, |
| "logps/real": -457.4916076660156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -68.63682556152344, |
| "rewards/margins": 64.71060180664062, |
| "rewards/real": -3.9262146949768066, |
| "step": 1370 |
| }, |
| { |
| "epoch": 5.0181818181818185, |
| "grad_norm": 0.0003307362200277528, |
| "learning_rate": 9.09090909090909e-08, |
| "logits/generated": 3.945413112640381, |
| "logits/real": 3.938251495361328, |
| "logps/generated": -1445.0692138671875, |
| "logps/real": -426.0289611816406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -66.72593688964844, |
| "rewards/margins": 62.89970016479492, |
| "rewards/real": -3.8262367248535156, |
| "step": 1380 |
| }, |
| { |
| "epoch": 5.054545454545455, |
| "grad_norm": 0.000133121896992743, |
| "learning_rate": 8.754208754208754e-08, |
| "logits/generated": 3.6478161811828613, |
| "logits/real": 3.513685941696167, |
| "logps/generated": -1289.758056640625, |
| "logps/real": -466.66119384765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -60.87055206298828, |
| "rewards/margins": 56.79083251953125, |
| "rewards/real": -4.07971715927124, |
| "step": 1390 |
| }, |
| { |
| "epoch": 5.090909090909091, |
| "grad_norm": 0.0006093163562234791, |
| "learning_rate": 8.417508417508418e-08, |
| "logits/generated": 3.8004722595214844, |
| "logits/real": 3.8927619457244873, |
| "logps/generated": -1247.7264404296875, |
| "logps/real": -516.7521362304688, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -59.13682174682617, |
| "rewards/margins": 54.87421417236328, |
| "rewards/real": -4.262602806091309, |
| "step": 1400 |
| }, |
| { |
| "epoch": 5.127272727272727, |
| "grad_norm": 0.0007397523574107219, |
| "learning_rate": 8.080808080808082e-08, |
| "logits/generated": 3.6465377807617188, |
| "logits/real": 3.726672649383545, |
| "logps/generated": -1440.296630859375, |
| "logps/real": -475.47283935546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -68.43962860107422, |
| "rewards/margins": 63.84468460083008, |
| "rewards/real": -4.594945430755615, |
| "step": 1410 |
| }, |
| { |
| "epoch": 5.163636363636364, |
| "grad_norm": 0.0003301105358851929, |
| "learning_rate": 7.744107744107744e-08, |
| "logits/generated": 4.112045764923096, |
| "logits/real": 3.6040217876434326, |
| "logps/generated": -1812.4189453125, |
| "logps/real": -512.8524169921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -84.6473617553711, |
| "rewards/margins": 80.17106628417969, |
| "rewards/real": -4.476306915283203, |
| "step": 1420 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 6.415611492466389e-05, |
| "learning_rate": 7.407407407407407e-08, |
| "logits/generated": 3.957354784011841, |
| "logits/real": 4.061827659606934, |
| "logps/generated": -1631.3790283203125, |
| "logps/real": -501.46453857421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -73.66567993164062, |
| "rewards/margins": 69.13175964355469, |
| "rewards/real": -4.533923149108887, |
| "step": 1430 |
| }, |
| { |
| "epoch": 5.236363636363636, |
| "grad_norm": 6.0549193113947094e-05, |
| "learning_rate": 7.070707070707071e-08, |
| "logits/generated": 3.871743679046631, |
| "logits/real": 3.737473249435425, |
| "logps/generated": -1661.531005859375, |
| "logps/real": -478.5572814941406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -76.69551086425781, |
| "rewards/margins": 72.16094207763672, |
| "rewards/real": -4.534560203552246, |
| "step": 1440 |
| }, |
| { |
| "epoch": 5.2727272727272725, |
| "grad_norm": 4.5226258894634027e-05, |
| "learning_rate": 6.734006734006734e-08, |
| "logits/generated": 3.7419967651367188, |
| "logits/real": 4.010978698730469, |
| "logps/generated": -1400.2882080078125, |
| "logps/real": -471.0364685058594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -65.46446228027344, |
| "rewards/margins": 61.05268478393555, |
| "rewards/real": -4.41178035736084, |
| "step": 1450 |
| }, |
| { |
| "epoch": 5.309090909090909, |
| "grad_norm": 0.00017288177287966272, |
| "learning_rate": 6.397306397306398e-08, |
| "logits/generated": 3.5792031288146973, |
| "logits/real": 3.6540589332580566, |
| "logps/generated": -1383.195068359375, |
| "logps/real": -501.170166015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -64.13642883300781, |
| "rewards/margins": 59.7711296081543, |
| "rewards/real": -4.365292549133301, |
| "step": 1460 |
| }, |
| { |
| "epoch": 5.345454545454546, |
| "grad_norm": 2.4760789132422138e-05, |
| "learning_rate": 6.060606060606061e-08, |
| "logits/generated": 3.975567579269409, |
| "logits/real": 3.8065619468688965, |
| "logps/generated": -1486.880126953125, |
| "logps/real": -466.724609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -69.9581298828125, |
| "rewards/margins": 64.9647445678711, |
| "rewards/real": -4.993378162384033, |
| "step": 1470 |
| }, |
| { |
| "epoch": 5.381818181818182, |
| "grad_norm": 0.0005286526078682218, |
| "learning_rate": 5.723905723905724e-08, |
| "logits/generated": 3.6327576637268066, |
| "logits/real": 3.6307568550109863, |
| "logps/generated": -984.1024169921875, |
| "logps/real": -434.69171142578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -47.59702682495117, |
| "rewards/margins": 43.50556182861328, |
| "rewards/real": -4.091464519500732, |
| "step": 1480 |
| }, |
| { |
| "epoch": 5.418181818181818, |
| "grad_norm": 0.0005548002338588267, |
| "learning_rate": 5.3872053872053865e-08, |
| "logits/generated": 3.9311721324920654, |
| "logits/real": 3.8737149238586426, |
| "logps/generated": -1967.173828125, |
| "logps/real": -438.94915771484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -94.0683364868164, |
| "rewards/margins": 89.73558807373047, |
| "rewards/real": -4.332737922668457, |
| "step": 1490 |
| }, |
| { |
| "epoch": 5.454545454545454, |
| "grad_norm": 0.00022247062485129725, |
| "learning_rate": 5.05050505050505e-08, |
| "logits/generated": 3.693833827972412, |
| "logits/real": 3.730213165283203, |
| "logps/generated": -1566.4849853515625, |
| "logps/real": -507.9705505371094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -74.78486633300781, |
| "rewards/margins": 69.65180206298828, |
| "rewards/real": -5.133058071136475, |
| "step": 1500 |
| }, |
| { |
| "epoch": 5.490909090909091, |
| "grad_norm": 0.00014823349149112444, |
| "learning_rate": 4.7138047138047134e-08, |
| "logits/generated": 4.056867599487305, |
| "logits/real": 3.9997782707214355, |
| "logps/generated": -1185.4552001953125, |
| "logps/real": -487.16693115234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -56.62416458129883, |
| "rewards/margins": 52.01531982421875, |
| "rewards/real": -4.608843803405762, |
| "step": 1510 |
| }, |
| { |
| "epoch": 5.527272727272727, |
| "grad_norm": 0.0002456658050929149, |
| "learning_rate": 4.377104377104377e-08, |
| "logits/generated": 3.7220072746276855, |
| "logits/real": 3.7891058921813965, |
| "logps/generated": -1615.204833984375, |
| "logps/real": -501.6162109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -76.18266296386719, |
| "rewards/margins": 71.68074035644531, |
| "rewards/real": -4.50192403793335, |
| "step": 1520 |
| }, |
| { |
| "epoch": 5.5636363636363635, |
| "grad_norm": 0.0006055537891314523, |
| "learning_rate": 4.040404040404041e-08, |
| "logits/generated": 3.833317995071411, |
| "logits/real": 3.712054491043091, |
| "logps/generated": -1525.6534423828125, |
| "logps/real": -478.32720947265625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -70.79685974121094, |
| "rewards/margins": 66.09513854980469, |
| "rewards/real": -4.701727390289307, |
| "step": 1530 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 6.131079180902863e-05, |
| "learning_rate": 3.7037037037037036e-08, |
| "logits/generated": 3.6760215759277344, |
| "logits/real": 3.605074405670166, |
| "logps/generated": -1132.375732421875, |
| "logps/real": -488.8179626464844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -54.268714904785156, |
| "rewards/margins": 49.204002380371094, |
| "rewards/real": -5.064711093902588, |
| "step": 1540 |
| }, |
| { |
| "epoch": 5.636363636363637, |
| "grad_norm": 8.1504638837413e-05, |
| "learning_rate": 3.367003367003367e-08, |
| "logits/generated": 3.844359874725342, |
| "logits/real": 3.853445529937744, |
| "logps/generated": -1274.287353515625, |
| "logps/real": -484.49102783203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -60.99640655517578, |
| "rewards/margins": 55.981048583984375, |
| "rewards/real": -5.015349388122559, |
| "step": 1550 |
| }, |
| { |
| "epoch": 5.672727272727273, |
| "grad_norm": 0.00024942794031913856, |
| "learning_rate": 3.0303030303030305e-08, |
| "logits/generated": 3.7670280933380127, |
| "logits/real": 3.671916961669922, |
| "logps/generated": -1370.51611328125, |
| "logps/real": -470.8055114746094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -66.9756088256836, |
| "rewards/margins": 62.2355842590332, |
| "rewards/real": -4.740030765533447, |
| "step": 1560 |
| }, |
| { |
| "epoch": 5.709090909090909, |
| "grad_norm": 0.0001492803952467262, |
| "learning_rate": 2.6936026936026933e-08, |
| "logits/generated": 3.5178914070129395, |
| "logits/real": 3.622554063796997, |
| "logps/generated": -1014.978515625, |
| "logps/real": -516.9625244140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -50.696678161621094, |
| "rewards/margins": 45.91947555541992, |
| "rewards/real": -4.777202129364014, |
| "step": 1570 |
| }, |
| { |
| "epoch": 5.745454545454545, |
| "grad_norm": 0.0006824322144998329, |
| "learning_rate": 2.3569023569023567e-08, |
| "logits/generated": 3.5519561767578125, |
| "logits/real": 3.5069594383239746, |
| "logps/generated": -1010.9691162109375, |
| "logps/real": -470.0237731933594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -53.67352294921875, |
| "rewards/margins": 49.31608963012695, |
| "rewards/real": -4.357430934906006, |
| "step": 1580 |
| }, |
| { |
| "epoch": 5.781818181818182, |
| "grad_norm": 0.000186341266970004, |
| "learning_rate": 2.0202020202020204e-08, |
| "logits/generated": 3.713832139968872, |
| "logits/real": 3.9890835285186768, |
| "logps/generated": -1338.298583984375, |
| "logps/real": -486.15679931640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -65.81551361083984, |
| "rewards/margins": 61.16944122314453, |
| "rewards/real": -4.646066665649414, |
| "step": 1590 |
| }, |
| { |
| "epoch": 5.818181818181818, |
| "grad_norm": 0.00025122871712599865, |
| "learning_rate": 1.6835016835016835e-08, |
| "logits/generated": 3.836996555328369, |
| "logits/real": 3.388111114501953, |
| "logps/generated": -1553.741455078125, |
| "logps/real": -477.65765380859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -75.15855407714844, |
| "rewards/margins": 70.58387756347656, |
| "rewards/real": -4.574671268463135, |
| "step": 1600 |
| }, |
| { |
| "epoch": 5.8545454545454545, |
| "grad_norm": 0.00011562643100957245, |
| "learning_rate": 1.3468013468013466e-08, |
| "logits/generated": 4.382861614227295, |
| "logits/real": 3.5439746379852295, |
| "logps/generated": -2293.033935546875, |
| "logps/real": -465.66790771484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -106.3143081665039, |
| "rewards/margins": 102.07095336914062, |
| "rewards/real": -4.243343830108643, |
| "step": 1610 |
| }, |
| { |
| "epoch": 5.890909090909091, |
| "grad_norm": 0.0005696740525733873, |
| "learning_rate": 1.0101010101010102e-08, |
| "logits/generated": 3.699605941772461, |
| "logits/real": 3.266279697418213, |
| "logps/generated": -1303.4429931640625, |
| "logps/real": -535.8048095703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -64.61190795898438, |
| "rewards/margins": 59.79120635986328, |
| "rewards/real": -4.820700645446777, |
| "step": 1620 |
| }, |
| { |
| "epoch": 5.927272727272728, |
| "grad_norm": 0.00021057172282241802, |
| "learning_rate": 6.734006734006733e-09, |
| "logits/generated": 3.7417564392089844, |
| "logits/real": 3.6985526084899902, |
| "logps/generated": -1704.074462890625, |
| "logps/real": -515.9856567382812, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -78.1255111694336, |
| "rewards/margins": 73.12936401367188, |
| "rewards/real": -4.99615478515625, |
| "step": 1630 |
| }, |
| { |
| "epoch": 5.963636363636364, |
| "grad_norm": 0.0004341167683210516, |
| "learning_rate": 3.3670033670033666e-09, |
| "logits/generated": 3.8730645179748535, |
| "logits/real": 3.839846134185791, |
| "logps/generated": -1266.2689208984375, |
| "logps/real": -423.4774475097656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -60.56571578979492, |
| "rewards/margins": 56.362342834472656, |
| "rewards/real": -4.20337438583374, |
| "step": 1640 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.0004156988432161806, |
| "learning_rate": 0.0, |
| "logits/generated": 3.8449695110321045, |
| "logits/real": 3.563386917114258, |
| "logps/generated": -1621.296142578125, |
| "logps/real": -509.37469482421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/generated": -76.79975891113281, |
| "rewards/margins": 70.92039489746094, |
| "rewards/real": -5.879361629486084, |
| "step": 1650 |
| }, |
| { |
| "epoch": 6.0, |
| "step": 1650, |
| "total_flos": 0.0, |
| "train_loss": 0.012667954623552208, |
| "train_runtime": 21434.6493, |
| "train_samples_per_second": 4.927, |
| "train_steps_per_second": 0.077 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1650, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|