| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9973828840617638, | |
| "eval_steps": 500, | |
| "global_step": 954, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "diff_generated": -1.8149629831314087, | |
| "epoch": 0.002093692750588851, | |
| "grad_norm": 43.26649304714989, | |
| "learning_rate": 2.083333333333333e-08, | |
| "logits/chosen": -2.1441590785980225, | |
| "logits/rejected": -2.0543735027313232, | |
| "logps/chosen": -276.82366943359375, | |
| "logps/rejected": -131.32485961914062, | |
| "logps_avg/chosen": -1.2310187816619873, | |
| "logps_avg/rejected": -0.5444889068603516, | |
| "loss": 0.9706, | |
| "losses_ref": -0.2554703652858734, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1, | |
| "u": -1.129564642906189, | |
| "weight": 0.727432131767273 | |
| }, | |
| { | |
| "diff_generated": -2.051100015640259, | |
| "epoch": 0.010468463752944255, | |
| "grad_norm": 36.895500460127934, | |
| "learning_rate": 1.0416666666666667e-07, | |
| "logits/chosen": -2.2114098072052, | |
| "logits/rejected": -2.10967755317688, | |
| "logps/chosen": -280.6037902832031, | |
| "logps/rejected": -162.30044555664062, | |
| "logps_avg/chosen": -1.178394079208374, | |
| "logps_avg/rejected": -0.6153301000595093, | |
| "loss": 0.8456, | |
| "losses_ref": -0.2878931164741516, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 5, | |
| "u": -1.3192780017852783, | |
| "weight": 0.6589411497116089 | |
| }, | |
| { | |
| "diff_generated": -2.0342957973480225, | |
| "epoch": 0.02093692750588851, | |
| "grad_norm": 42.24412669427099, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "logits/chosen": -2.3565850257873535, | |
| "logits/rejected": -2.1584813594818115, | |
| "logps/chosen": -300.6426086425781, | |
| "logps/rejected": -167.40040588378906, | |
| "logps_avg/chosen": -1.1184991598129272, | |
| "logps_avg/rejected": -0.6102887988090515, | |
| "loss": 0.8731, | |
| "losses_ref": -0.2850458025932312, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 10, | |
| "u": -1.2951091527938843, | |
| "weight": 0.6724194884300232 | |
| }, | |
| { | |
| "diff_generated": -1.9851667881011963, | |
| "epoch": 0.031405391258832765, | |
| "grad_norm": 31.267399626309693, | |
| "learning_rate": 3.1249999999999997e-07, | |
| "logits/chosen": -2.2946715354919434, | |
| "logits/rejected": -2.146397113800049, | |
| "logps/chosen": -293.4947509765625, | |
| "logps/rejected": -156.3843994140625, | |
| "logps_avg/chosen": -1.0986683368682861, | |
| "logps_avg/rejected": -0.5955500602722168, | |
| "loss": 0.7379, | |
| "losses_ref": -0.28325891494750977, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 15, | |
| "u": -1.2593215703964233, | |
| "weight": 0.6894552111625671 | |
| }, | |
| { | |
| "diff_generated": -2.0035815238952637, | |
| "epoch": 0.04187385501177702, | |
| "grad_norm": 22.686346023577535, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logits/chosen": -2.2586379051208496, | |
| "logits/rejected": -2.134080410003662, | |
| "logps/chosen": -261.52960205078125, | |
| "logps/rejected": -161.9304656982422, | |
| "logps_avg/chosen": -0.9046722650527954, | |
| "logps_avg/rejected": -0.6010745763778687, | |
| "loss": 0.5984, | |
| "losses_ref": -0.2947906255722046, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 20, | |
| "u": -1.323677659034729, | |
| "weight": 0.6650992631912231 | |
| }, | |
| { | |
| "diff_generated": -3.258924961090088, | |
| "epoch": 0.05234231876472128, | |
| "grad_norm": 15.412617135483135, | |
| "learning_rate": 5.208333333333334e-07, | |
| "logits/chosen": -2.1527328491210938, | |
| "logits/rejected": -2.013265609741211, | |
| "logps/chosen": -257.1512756347656, | |
| "logps/rejected": -277.85711669921875, | |
| "logps_avg/chosen": -0.8043298721313477, | |
| "logps_avg/rejected": -0.9776775240898132, | |
| "loss": 0.5813, | |
| "losses_ref": -0.25987568497657776, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 25, | |
| "u": -1.7415921688079834, | |
| "weight": 0.4334268569946289 | |
| }, | |
| { | |
| "diff_generated": -6.022626876831055, | |
| "epoch": 0.06281078251766553, | |
| "grad_norm": 15.25952740077981, | |
| "learning_rate": 6.249999999999999e-07, | |
| "logits/chosen": -2.1849024295806885, | |
| "logits/rejected": -2.1174261569976807, | |
| "logps/chosen": -248.16909790039062, | |
| "logps/rejected": -534.7174682617188, | |
| "logps_avg/chosen": -0.8181886672973633, | |
| "logps_avg/rejected": -1.8067880868911743, | |
| "loss": 0.667, | |
| "losses_ref": -0.1500019133090973, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 30, | |
| "u": -2.0229365825653076, | |
| "weight": 0.225816011428833 | |
| }, | |
| { | |
| "diff_generated": -9.153361320495605, | |
| "epoch": 0.07327924627060979, | |
| "grad_norm": 18.48300356782214, | |
| "learning_rate": 7.291666666666666e-07, | |
| "logits/chosen": -2.2708792686462402, | |
| "logits/rejected": -2.130821704864502, | |
| "logps/chosen": -255.21701049804688, | |
| "logps/rejected": -782.3409423828125, | |
| "logps_avg/chosen": -0.7904274463653564, | |
| "logps_avg/rejected": -2.7460083961486816, | |
| "loss": 0.6695, | |
| "losses_ref": -0.1412452608346939, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 35, | |
| "u": -2.0066444873809814, | |
| "weight": 0.2316206991672516 | |
| }, | |
| { | |
| "diff_generated": -13.209306716918945, | |
| "epoch": 0.08374771002355404, | |
| "grad_norm": 11.436173876886219, | |
| "learning_rate": 8.333333333333333e-07, | |
| "logits/chosen": -2.2111456394195557, | |
| "logits/rejected": -2.13924241065979, | |
| "logps/chosen": -241.15072631835938, | |
| "logps/rejected": -1223.218017578125, | |
| "logps_avg/chosen": -0.7820993661880493, | |
| "logps_avg/rejected": -3.962791919708252, | |
| "loss": 0.6798, | |
| "losses_ref": -0.09846386313438416, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 40, | |
| "u": -2.131727457046509, | |
| "weight": 0.1441923826932907 | |
| }, | |
| { | |
| "diff_generated": -14.63012409210205, | |
| "epoch": 0.0942161737764983, | |
| "grad_norm": 59.29532742939981, | |
| "learning_rate": 9.374999999999999e-07, | |
| "logits/chosen": -2.298741102218628, | |
| "logits/rejected": -2.0653302669525146, | |
| "logps/chosen": -264.97357177734375, | |
| "logps/rejected": -1320.9332275390625, | |
| "logps_avg/chosen": -0.779043436050415, | |
| "logps_avg/rejected": -4.389036655426025, | |
| "loss": 0.6914, | |
| "losses_ref": -0.08891113847494125, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 45, | |
| "u": -2.13495135307312, | |
| "weight": 0.13693246245384216 | |
| }, | |
| { | |
| "diff_generated": -12.911537170410156, | |
| "epoch": 0.10468463752944256, | |
| "grad_norm": 8.930786410211843, | |
| "learning_rate": 1.0416666666666667e-06, | |
| "logits/chosen": -2.302333116531372, | |
| "logits/rejected": -2.2043356895446777, | |
| "logps/chosen": -241.756103515625, | |
| "logps/rejected": -1145.5604248046875, | |
| "logps_avg/chosen": -0.7927433252334595, | |
| "logps_avg/rejected": -3.8734612464904785, | |
| "loss": 0.6993, | |
| "losses_ref": -0.10359562933444977, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 50, | |
| "u": -2.1199097633361816, | |
| "weight": 0.15450677275657654 | |
| }, | |
| { | |
| "diff_generated": -11.095788955688477, | |
| "epoch": 0.11515310128238682, | |
| "grad_norm": 9.783120635378207, | |
| "learning_rate": 1.1458333333333333e-06, | |
| "logits/chosen": -2.4609317779541016, | |
| "logits/rejected": -2.3575634956359863, | |
| "logps/chosen": -245.7393798828125, | |
| "logps/rejected": -981.2423095703125, | |
| "logps_avg/chosen": -0.8303758502006531, | |
| "logps_avg/rejected": -3.3287365436553955, | |
| "loss": 0.6926, | |
| "losses_ref": -0.08979364484548569, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 55, | |
| "u": -2.17197322845459, | |
| "weight": 0.11447404325008392 | |
| }, | |
| { | |
| "diff_generated": -13.795969009399414, | |
| "epoch": 0.12562156503533106, | |
| "grad_norm": 9.420248973366883, | |
| "learning_rate": 1.2499999999999999e-06, | |
| "logits/chosen": -2.5860393047332764, | |
| "logits/rejected": -2.482574939727783, | |
| "logps/chosen": -249.44070434570312, | |
| "logps/rejected": -1232.59228515625, | |
| "logps_avg/chosen": -0.7758530378341675, | |
| "logps_avg/rejected": -4.138791084289551, | |
| "loss": 0.6815, | |
| "losses_ref": -0.0876917839050293, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 60, | |
| "u": -2.1477303504943848, | |
| "weight": 0.12934879958629608 | |
| }, | |
| { | |
| "diff_generated": -16.25264549255371, | |
| "epoch": 0.1360900287882753, | |
| "grad_norm": 12.24868452539092, | |
| "learning_rate": 1.3541666666666667e-06, | |
| "logits/chosen": -2.640986204147339, | |
| "logits/rejected": -2.510274648666382, | |
| "logps/chosen": -258.56109619140625, | |
| "logps/rejected": -1508.2763671875, | |
| "logps_avg/chosen": -0.7998191118240356, | |
| "logps_avg/rejected": -4.875794410705566, | |
| "loss": 0.7039, | |
| "losses_ref": -0.07322683185338974, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 65, | |
| "u": -2.180368423461914, | |
| "weight": 0.1039782166481018 | |
| }, | |
| { | |
| "diff_generated": -16.121641159057617, | |
| "epoch": 0.14655849254121958, | |
| "grad_norm": 7.905374307014113, | |
| "learning_rate": 1.4583333333333333e-06, | |
| "logits/chosen": -2.581535816192627, | |
| "logits/rejected": -2.4923813343048096, | |
| "logps/chosen": -238.9574432373047, | |
| "logps/rejected": -1444.403564453125, | |
| "logps_avg/chosen": -0.8027188181877136, | |
| "logps_avg/rejected": -4.836493015289307, | |
| "loss": 0.6907, | |
| "losses_ref": -0.0750691220164299, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 70, | |
| "u": -2.189579486846924, | |
| "weight": 0.09880717098712921 | |
| }, | |
| { | |
| "diff_generated": -16.705251693725586, | |
| "epoch": 0.15702695629416383, | |
| "grad_norm": 9.573720561122785, | |
| "learning_rate": 1.5624999999999999e-06, | |
| "logits/chosen": -2.598374128341675, | |
| "logits/rejected": -2.446035146713257, | |
| "logps/chosen": -270.2249450683594, | |
| "logps/rejected": -1517.441650390625, | |
| "logps_avg/chosen": -0.7964105606079102, | |
| "logps_avg/rejected": -5.011575698852539, | |
| "loss": 0.725, | |
| "losses_ref": -0.07196028530597687, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 75, | |
| "u": -2.21059513092041, | |
| "weight": 0.08612708002328873 | |
| }, | |
| { | |
| "diff_generated": -18.304201126098633, | |
| "epoch": 0.16749542004710807, | |
| "grad_norm": 7.0924424799681, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "logits/chosen": -2.591045618057251, | |
| "logits/rejected": -2.489673376083374, | |
| "logps/chosen": -216.99685668945312, | |
| "logps/rejected": -1667.5283203125, | |
| "logps_avg/chosen": -0.7215350866317749, | |
| "logps_avg/rejected": -5.491259574890137, | |
| "loss": 0.6699, | |
| "losses_ref": -0.06580645591020584, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 80, | |
| "u": -2.2118382453918457, | |
| "weight": 0.08225957304239273 | |
| }, | |
| { | |
| "diff_generated": -18.906076431274414, | |
| "epoch": 0.17796388380005235, | |
| "grad_norm": 7.632608732109636, | |
| "learning_rate": 1.7708333333333332e-06, | |
| "logits/chosen": -2.6046338081359863, | |
| "logits/rejected": -2.4658734798431396, | |
| "logps/chosen": -244.0012664794922, | |
| "logps/rejected": -1689.686767578125, | |
| "logps_avg/chosen": -0.7541030049324036, | |
| "logps_avg/rejected": -5.671823978424072, | |
| "loss": 0.7032, | |
| "losses_ref": -0.06257248669862747, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 85, | |
| "u": -2.2184884548187256, | |
| "weight": 0.07795710116624832 | |
| }, | |
| { | |
| "diff_generated": -22.26788902282715, | |
| "epoch": 0.1884323475529966, | |
| "grad_norm": 10.332533231863795, | |
| "learning_rate": 1.8749999999999998e-06, | |
| "logits/chosen": -2.62504243850708, | |
| "logits/rejected": -2.4670681953430176, | |
| "logps/chosen": -241.73550415039062, | |
| "logps/rejected": -1991.0435791015625, | |
| "logps_avg/chosen": -0.7270082235336304, | |
| "logps_avg/rejected": -6.680366516113281, | |
| "loss": 0.689, | |
| "losses_ref": -0.06023075059056282, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 90, | |
| "u": -2.2096261978149414, | |
| "weight": 0.08131252229213715 | |
| }, | |
| { | |
| "diff_generated": -20.242061614990234, | |
| "epoch": 0.19890081130594087, | |
| "grad_norm": 7.009998646854354, | |
| "learning_rate": 1.9791666666666666e-06, | |
| "logits/chosen": -2.5733673572540283, | |
| "logits/rejected": -2.4526114463806152, | |
| "logps/chosen": -241.0827178955078, | |
| "logps/rejected": -1833.453369140625, | |
| "logps_avg/chosen": -0.7628769278526306, | |
| "logps_avg/rejected": -6.07261848449707, | |
| "loss": 0.6963, | |
| "losses_ref": -0.06475149095058441, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 95, | |
| "u": -2.2028064727783203, | |
| "weight": 0.0875387191772461 | |
| }, | |
| { | |
| "diff_generated": -20.439355850219727, | |
| "epoch": 0.2093692750588851, | |
| "grad_norm": 8.018231688525765, | |
| "learning_rate": 1.9998927475076105e-06, | |
| "logits/chosen": -2.621689558029175, | |
| "logits/rejected": -2.470346689224243, | |
| "logps/chosen": -245.5767059326172, | |
| "logps/rejected": -1799.0728759765625, | |
| "logps_avg/chosen": -0.7319446802139282, | |
| "logps_avg/rejected": -6.13180685043335, | |
| "loss": 0.713, | |
| "losses_ref": -0.06253904104232788, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 100, | |
| "u": -2.2177913188934326, | |
| "weight": 0.07825066894292831 | |
| }, | |
| { | |
| "diff_generated": -20.04744529724121, | |
| "epoch": 0.21983773881182936, | |
| "grad_norm": 7.248502316485956, | |
| "learning_rate": 1.9994570736865402e-06, | |
| "logits/chosen": -2.5862081050872803, | |
| "logits/rejected": -2.4370968341827393, | |
| "logps/chosen": -236.89501953125, | |
| "logps/rejected": -1794.0465087890625, | |
| "logps_avg/chosen": -0.7266777753829956, | |
| "logps_avg/rejected": -6.01423454284668, | |
| "loss": 0.6834, | |
| "losses_ref": -0.06446884572505951, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 105, | |
| "u": -2.198464870452881, | |
| "weight": 0.0902954638004303 | |
| }, | |
| { | |
| "diff_generated": -20.10696792602539, | |
| "epoch": 0.23030620256477363, | |
| "grad_norm": 6.989545794085033, | |
| "learning_rate": 1.9986864211644068e-06, | |
| "logits/chosen": -2.570603609085083, | |
| "logits/rejected": -2.431187391281128, | |
| "logps/chosen": -236.31884765625, | |
| "logps/rejected": -1773.07421875, | |
| "logps_avg/chosen": -0.7348344326019287, | |
| "logps_avg/rejected": -6.032090187072754, | |
| "loss": 0.6907, | |
| "losses_ref": -0.06961078941822052, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 110, | |
| "u": -2.2041425704956055, | |
| "weight": 0.08867262303829193 | |
| }, | |
| { | |
| "diff_generated": -21.548114776611328, | |
| "epoch": 0.24077466631771788, | |
| "grad_norm": 8.060053280392543, | |
| "learning_rate": 1.997581048233623e-06, | |
| "logits/chosen": -2.581951141357422, | |
| "logits/rejected": -2.4441328048706055, | |
| "logps/chosen": -232.8576202392578, | |
| "logps/rejected": -1942.4847412109375, | |
| "logps_avg/chosen": -0.7739059329032898, | |
| "logps_avg/rejected": -6.4644341468811035, | |
| "loss": 0.6817, | |
| "losses_ref": -0.062096286565065384, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 115, | |
| "u": -2.216289520263672, | |
| "weight": 0.07720647752285004 | |
| }, | |
| { | |
| "diff_generated": -20.77760887145996, | |
| "epoch": 0.2512431300706621, | |
| "grad_norm": 6.53936940072868, | |
| "learning_rate": 1.9961413253717214e-06, | |
| "logits/chosen": -2.610959768295288, | |
| "logits/rejected": -2.4528729915618896, | |
| "logps/chosen": -233.8311004638672, | |
| "logps/rejected": -1862.2890625, | |
| "logps_avg/chosen": -0.7324265837669373, | |
| "logps_avg/rejected": -6.233283519744873, | |
| "loss": 0.6932, | |
| "losses_ref": -0.0750860795378685, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 120, | |
| "u": -2.200193405151367, | |
| "weight": 0.09466435015201569 | |
| }, | |
| { | |
| "diff_generated": -23.185279846191406, | |
| "epoch": 0.26171159382360637, | |
| "grad_norm": 7.018169897249557, | |
| "learning_rate": 1.994367735117177e-06, | |
| "logits/chosen": -2.5702836513519287, | |
| "logits/rejected": -2.391747236251831, | |
| "logps/chosen": -220.02792358398438, | |
| "logps/rejected": -2155.526123046875, | |
| "logps_avg/chosen": -0.7447048425674438, | |
| "logps_avg/rejected": -6.955584526062012, | |
| "loss": 0.7052, | |
| "losses_ref": -0.05986471846699715, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 125, | |
| "u": -2.1955928802490234, | |
| "weight": 0.08941423892974854 | |
| }, | |
| { | |
| "diff_generated": -22.66459846496582, | |
| "epoch": 0.2721800575765506, | |
| "grad_norm": 31.410489955444024, | |
| "learning_rate": 1.992260871907687e-06, | |
| "logits/chosen": -2.567049503326416, | |
| "logits/rejected": -2.4223153591156006, | |
| "logps/chosen": -242.8145751953125, | |
| "logps/rejected": -2053.98388671875, | |
| "logps_avg/chosen": -0.7978746294975281, | |
| "logps_avg/rejected": -6.799378871917725, | |
| "loss": 0.7155, | |
| "losses_ref": -0.04843521863222122, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 130, | |
| "u": -2.239774465560913, | |
| "weight": 0.05849189683794975 | |
| }, | |
| { | |
| "diff_generated": -23.263744354248047, | |
| "epoch": 0.2826485213294949, | |
| "grad_norm": 7.49886026826363, | |
| "learning_rate": 1.9898214418809326e-06, | |
| "logits/chosen": -2.532973289489746, | |
| "logits/rejected": -2.372011423110962, | |
| "logps/chosen": -241.5897674560547, | |
| "logps/rejected": -2110.734375, | |
| "logps_avg/chosen": -0.7454018592834473, | |
| "logps_avg/rejected": -6.979123592376709, | |
| "loss": 0.6961, | |
| "losses_ref": -0.04879006743431091, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 135, | |
| "u": -2.2302093505859375, | |
| "weight": 0.06326891481876373 | |
| }, | |
| { | |
| "diff_generated": -22.754619598388672, | |
| "epoch": 0.29311698508243916, | |
| "grad_norm": 7.014311333863948, | |
| "learning_rate": 1.9870502626379126e-06, | |
| "logits/chosen": -2.488236904144287, | |
| "logits/rejected": -2.361851215362549, | |
| "logps/chosen": -234.2844696044922, | |
| "logps/rejected": -2074.984375, | |
| "logps_avg/chosen": -0.7961763143539429, | |
| "logps_avg/rejected": -6.826386451721191, | |
| "loss": 0.7285, | |
| "losses_ref": -0.055333297699689865, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 140, | |
| "u": -2.2265305519104004, | |
| "weight": 0.06895061582326889 | |
| }, | |
| { | |
| "diff_generated": -20.225128173828125, | |
| "epoch": 0.3035854488353834, | |
| "grad_norm": 6.7478341009341865, | |
| "learning_rate": 1.983948262968915e-06, | |
| "logits/chosen": -2.5856704711914062, | |
| "logits/rejected": -2.4371695518493652, | |
| "logps/chosen": -263.78900146484375, | |
| "logps/rejected": -1824.1302490234375, | |
| "logps_avg/chosen": -0.7517282366752625, | |
| "logps_avg/rejected": -6.067538738250732, | |
| "loss": 0.6839, | |
| "losses_ref": -0.06395243108272552, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 145, | |
| "u": -2.2037534713745117, | |
| "weight": 0.08503635227680206 | |
| }, | |
| { | |
| "diff_generated": -24.050996780395508, | |
| "epoch": 0.31405391258832765, | |
| "grad_norm": 7.353090756036984, | |
| "learning_rate": 1.9805164825422237e-06, | |
| "logits/chosen": -2.607673168182373, | |
| "logits/rejected": -2.408552646636963, | |
| "logps/chosen": -241.8136749267578, | |
| "logps/rejected": -2169.62353515625, | |
| "logps_avg/chosen": -0.7578203082084656, | |
| "logps_avg/rejected": -7.215299129486084, | |
| "loss": 0.6958, | |
| "losses_ref": -0.05395021289587021, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 150, | |
| "u": -2.234814167022705, | |
| "weight": 0.06379680335521698 | |
| }, | |
| { | |
| "diff_generated": -23.94837188720703, | |
| "epoch": 0.3245223763412719, | |
| "grad_norm": 7.484499798723553, | |
| "learning_rate": 1.9767560715556594e-06, | |
| "logits/chosen": -2.5357837677001953, | |
| "logits/rejected": -2.3741650581359863, | |
| "logps/chosen": -237.78701782226562, | |
| "logps/rejected": -2074.5205078125, | |
| "logps_avg/chosen": -0.7676432132720947, | |
| "logps_avg/rejected": -7.184511661529541, | |
| "loss": 0.7199, | |
| "losses_ref": -0.044619906693696976, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 155, | |
| "u": -2.2300286293029785, | |
| "weight": 0.061775337904691696 | |
| }, | |
| { | |
| "diff_generated": -24.210857391357422, | |
| "epoch": 0.33499084009421615, | |
| "grad_norm": 7.8117370330190115, | |
| "learning_rate": 1.972668290351084e-06, | |
| "logits/chosen": -2.532038688659668, | |
| "logits/rejected": -2.3655738830566406, | |
| "logps/chosen": -246.5824432373047, | |
| "logps/rejected": -2090.85693359375, | |
| "logps_avg/chosen": -0.7575558423995972, | |
| "logps_avg/rejected": -7.2632575035095215, | |
| "loss": 0.6939, | |
| "losses_ref": -0.04590834304690361, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 160, | |
| "u": -2.236487865447998, | |
| "weight": 0.05844121426343918 | |
| }, | |
| { | |
| "diff_generated": -20.957683563232422, | |
| "epoch": 0.34545930384716045, | |
| "grad_norm": 7.4058662270815026, | |
| "learning_rate": 1.968254508991978e-06, | |
| "logits/chosen": -2.6238338947296143, | |
| "logits/rejected": -2.4566922187805176, | |
| "logps/chosen": -245.81436157226562, | |
| "logps/rejected": -1895.0390625, | |
| "logps_avg/chosen": -0.7605465054512024, | |
| "logps_avg/rejected": -6.2873053550720215, | |
| "loss": 0.701, | |
| "losses_ref": -0.05409424751996994, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 165, | |
| "u": -2.236602783203125, | |
| "weight": 0.0619116947054863 | |
| }, | |
| { | |
| "diff_generated": -23.36783218383789, | |
| "epoch": 0.3559277676001047, | |
| "grad_norm": 7.74288657614709, | |
| "learning_rate": 1.9635162068042544e-06, | |
| "logits/chosen": -2.5531725883483887, | |
| "logits/rejected": -2.385223627090454, | |
| "logps/chosen": -250.6099090576172, | |
| "logps/rejected": -2106.687744140625, | |
| "logps_avg/chosen": -0.7441612482070923, | |
| "logps_avg/rejected": -7.010349273681641, | |
| "loss": 0.7035, | |
| "losses_ref": -0.060589499771595, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 170, | |
| "u": -2.218136787414551, | |
| "weight": 0.0771271213889122 | |
| }, | |
| { | |
| "diff_generated": -23.426584243774414, | |
| "epoch": 0.36639623135304894, | |
| "grad_norm": 6.175218562127925, | |
| "learning_rate": 1.958454971880441e-06, | |
| "logits/chosen": -2.545517683029175, | |
| "logits/rejected": -2.3892464637756348, | |
| "logps/chosen": -271.62152099609375, | |
| "logps/rejected": -2128.689208984375, | |
| "logps_avg/chosen": -0.7712885141372681, | |
| "logps_avg/rejected": -7.027975559234619, | |
| "loss": 0.6768, | |
| "losses_ref": -0.059747565537691116, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 175, | |
| "u": -2.221135377883911, | |
| "weight": 0.07428421080112457 | |
| }, | |
| { | |
| "diff_generated": -23.27652931213379, | |
| "epoch": 0.3768646951059932, | |
| "grad_norm": 7.602114045248552, | |
| "learning_rate": 1.9530725005474194e-06, | |
| "logits/chosen": -2.5965559482574463, | |
| "logits/rejected": -2.4581873416900635, | |
| "logps/chosen": -225.35818481445312, | |
| "logps/rejected": -2096.1943359375, | |
| "logps_avg/chosen": -0.7377344369888306, | |
| "logps_avg/rejected": -6.982959747314453, | |
| "loss": 0.6599, | |
| "losses_ref": -0.06142450496554375, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 180, | |
| "u": -2.224907875061035, | |
| "weight": 0.07244168221950531 | |
| }, | |
| { | |
| "diff_generated": -24.591943740844727, | |
| "epoch": 0.38733315885893743, | |
| "grad_norm": 6.781608060052273, | |
| "learning_rate": 1.9473705967978807e-06, | |
| "logits/chosen": -2.6047005653381348, | |
| "logits/rejected": -2.4540090560913086, | |
| "logps/chosen": -231.2947235107422, | |
| "logps/rejected": -2179.2568359375, | |
| "logps_avg/chosen": -0.689501166343689, | |
| "logps_avg/rejected": -7.3775835037231445, | |
| "loss": 0.6665, | |
| "losses_ref": -0.05740996077656746, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 185, | |
| "u": -2.2239882946014404, | |
| "weight": 0.07182185351848602 | |
| }, | |
| { | |
| "diff_generated": -25.36248016357422, | |
| "epoch": 0.39780162261188173, | |
| "grad_norm": 10.641404317565371, | |
| "learning_rate": 1.941351171685697e-06, | |
| "logits/chosen": -2.5710506439208984, | |
| "logits/rejected": -2.4436774253845215, | |
| "logps/chosen": -236.1158905029297, | |
| "logps/rejected": -2273.37158203125, | |
| "logps_avg/chosen": -0.7929750680923462, | |
| "logps_avg/rejected": -7.6087446212768555, | |
| "loss": 0.7108, | |
| "losses_ref": -0.05253469944000244, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 190, | |
| "u": -2.239004373550415, | |
| "weight": 0.06010523438453674 | |
| }, | |
| { | |
| "diff_generated": -25.077518463134766, | |
| "epoch": 0.408270086364826, | |
| "grad_norm": 9.470830241427814, | |
| "learning_rate": 1.9350162426854148e-06, | |
| "logits/chosen": -2.602252244949341, | |
| "logits/rejected": -2.4661412239074707, | |
| "logps/chosen": -197.88571166992188, | |
| "logps/rejected": -2272.28076171875, | |
| "logps_avg/chosen": -0.7630836367607117, | |
| "logps_avg/rejected": -7.523255348205566, | |
| "loss": 0.6999, | |
| "losses_ref": -0.04595743492245674, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 195, | |
| "u": -2.243717670440674, | |
| "weight": 0.054233819246292114 | |
| }, | |
| { | |
| "diff_generated": -24.682910919189453, | |
| "epoch": 0.4187385501177702, | |
| "grad_norm": 6.269041714690376, | |
| "learning_rate": 1.9283679330160725e-06, | |
| "logits/chosen": -2.5849337577819824, | |
| "logits/rejected": -2.394373655319214, | |
| "logps/chosen": -242.97378540039062, | |
| "logps/rejected": -2224.541015625, | |
| "logps_avg/chosen": -0.7199097871780396, | |
| "logps_avg/rejected": -7.404873847961426, | |
| "loss": 0.69, | |
| "losses_ref": -0.0516563281416893, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 200, | |
| "u": -2.2307353019714355, | |
| "weight": 0.06507831811904907 | |
| }, | |
| { | |
| "diff_generated": -22.525114059448242, | |
| "epoch": 0.42920701387071447, | |
| "grad_norm": 6.963251924926938, | |
| "learning_rate": 1.9214084709295847e-06, | |
| "logits/chosen": -2.6382362842559814, | |
| "logits/rejected": -2.4577651023864746, | |
| "logps/chosen": -259.39349365234375, | |
| "logps/rejected": -2065.585693359375, | |
| "logps_avg/chosen": -0.7225343585014343, | |
| "logps_avg/rejected": -6.757534027099609, | |
| "loss": 0.696, | |
| "losses_ref": -0.05577712133526802, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 205, | |
| "u": -2.2293906211853027, | |
| "weight": 0.0664394274353981 | |
| }, | |
| { | |
| "diff_generated": -22.145648956298828, | |
| "epoch": 0.4396754776236587, | |
| "grad_norm": 7.299076527075288, | |
| "learning_rate": 1.9141401889639164e-06, | |
| "logits/chosen": -2.5583319664001465, | |
| "logits/rejected": -2.4039664268493652, | |
| "logps/chosen": -238.9542694091797, | |
| "logps/rejected": -2062.404541015625, | |
| "logps_avg/chosen": -0.7716320753097534, | |
| "logps_avg/rejected": -6.6436944007873535, | |
| "loss": 0.6993, | |
| "losses_ref": -0.058913152664899826, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 210, | |
| "u": -2.2152769565582275, | |
| "weight": 0.07614172250032425 | |
| }, | |
| { | |
| "diff_generated": -23.579111099243164, | |
| "epoch": 0.45014394137660296, | |
| "grad_norm": 8.50842985439364, | |
| "learning_rate": 1.906565523161312e-06, | |
| "logits/chosen": -2.600001335144043, | |
| "logits/rejected": -2.4590041637420654, | |
| "logps/chosen": -231.87673950195312, | |
| "logps/rejected": -2083.391357421875, | |
| "logps_avg/chosen": -0.7907384634017944, | |
| "logps_avg/rejected": -7.073732852935791, | |
| "loss": 0.7066, | |
| "losses_ref": -0.05489416792988777, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 215, | |
| "u": -2.234017848968506, | |
| "weight": 0.06394322961568832 | |
| }, | |
| { | |
| "diff_generated": -27.941696166992188, | |
| "epoch": 0.46061240512954726, | |
| "grad_norm": 6.305196829448204, | |
| "learning_rate": 1.8986870122518259e-06, | |
| "logits/chosen": -2.6018145084381104, | |
| "logits/rejected": -2.436535358428955, | |
| "logps/chosen": -245.06005859375, | |
| "logps/rejected": -2555.211181640625, | |
| "logps_avg/chosen": -0.7695084810256958, | |
| "logps_avg/rejected": -8.382509231567383, | |
| "loss": 0.7137, | |
| "losses_ref": -0.04443511739373207, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 220, | |
| "u": -2.2481765747070312, | |
| "weight": 0.051543742418289185 | |
| }, | |
| { | |
| "diff_generated": -26.58075523376465, | |
| "epoch": 0.4710808688824915, | |
| "grad_norm": 6.961879634992629, | |
| "learning_rate": 1.8905072968024423e-06, | |
| "logits/chosen": -2.567117214202881, | |
| "logits/rejected": -2.3789048194885254, | |
| "logps/chosen": -244.94296264648438, | |
| "logps/rejected": -2428.1923828125, | |
| "logps_avg/chosen": -0.7622503042221069, | |
| "logps_avg/rejected": -7.974226474761963, | |
| "loss": 0.6936, | |
| "losses_ref": -0.04088358208537102, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 225, | |
| "u": -2.2515604496002197, | |
| "weight": 0.04799002408981323 | |
| }, | |
| { | |
| "diff_generated": -24.84002113342285, | |
| "epoch": 0.48154933263543576, | |
| "grad_norm": 7.1763831101881275, | |
| "learning_rate": 1.88202911833206e-06, | |
| "logits/chosen": -2.520597219467163, | |
| "logits/rejected": -2.395034074783325, | |
| "logps/chosen": -213.36929321289062, | |
| "logps/rejected": -2192.75390625, | |
| "logps_avg/chosen": -0.7349015474319458, | |
| "logps_avg/rejected": -7.4520063400268555, | |
| "loss": 0.6978, | |
| "losses_ref": -0.051292240619659424, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 230, | |
| "u": -2.231480360031128, | |
| "weight": 0.06503967195749283 | |
| }, | |
| { | |
| "diff_generated": -26.721317291259766, | |
| "epoch": 0.49201779638838, | |
| "grad_norm": 5.9191842237687835, | |
| "learning_rate": 1.873255318392644e-06, | |
| "logits/chosen": -2.4896910190582275, | |
| "logits/rejected": -2.296112060546875, | |
| "logps/chosen": -239.5654296875, | |
| "logps/rejected": -2448.593505859375, | |
| "logps_avg/chosen": -0.7563043236732483, | |
| "logps_avg/rejected": -8.01639461517334, | |
| "loss": 0.7163, | |
| "losses_ref": -0.05161570757627487, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 235, | |
| "u": -2.230043649673462, | |
| "weight": 0.06615348160266876 | |
| }, | |
| { | |
| "diff_generated": -22.361705780029297, | |
| "epoch": 0.5024862601413242, | |
| "grad_norm": 6.264520814093759, | |
| "learning_rate": 1.8641888376168483e-06, | |
| "logits/chosen": -2.4571125507354736, | |
| "logits/rejected": -2.3177151679992676, | |
| "logps/chosen": -219.5469207763672, | |
| "logps/rejected": -1993.8834228515625, | |
| "logps_avg/chosen": -0.7551349997520447, | |
| "logps_avg/rejected": -6.708512783050537, | |
| "loss": 0.7049, | |
| "losses_ref": -0.05244841426610947, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 240, | |
| "u": -2.2336738109588623, | |
| "weight": 0.06469963490962982 | |
| }, | |
| { | |
| "diff_generated": -19.673988342285156, | |
| "epoch": 0.5129547238942685, | |
| "grad_norm": 6.373155717275301, | |
| "learning_rate": 1.8548327147324312e-06, | |
| "logits/chosen": -2.459257125854492, | |
| "logits/rejected": -2.273050546646118, | |
| "logps/chosen": -248.42935180664062, | |
| "logps/rejected": -1772.5706787109375, | |
| "logps_avg/chosen": -0.7812148928642273, | |
| "logps_avg/rejected": -5.902197360992432, | |
| "loss": 0.6961, | |
| "losses_ref": -0.0656919851899147, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 245, | |
| "u": -2.2100472450256348, | |
| "weight": 0.08213107287883759 | |
| }, | |
| { | |
| "diff_generated": -18.21377182006836, | |
| "epoch": 0.5234231876472127, | |
| "grad_norm": 7.015946672120974, | |
| "learning_rate": 1.8451900855437948e-06, | |
| "logits/chosen": -2.4628689289093018, | |
| "logits/rejected": -2.322192430496216, | |
| "logps/chosen": -242.85488891601562, | |
| "logps/rejected": -1614.31201171875, | |
| "logps_avg/chosen": -0.7260557413101196, | |
| "logps_avg/rejected": -5.4641313552856445, | |
| "loss": 0.6754, | |
| "losses_ref": -0.05365673825144768, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 250, | |
| "u": -2.234142303466797, | |
| "weight": 0.06292648613452911 | |
| }, | |
| { | |
| "diff_generated": -21.006351470947266, | |
| "epoch": 0.533891651400157, | |
| "grad_norm": 6.444057235727556, | |
| "learning_rate": 1.8352641818809846e-06, | |
| "logits/chosen": -2.44881010055542, | |
| "logits/rejected": -2.264845371246338, | |
| "logps/chosen": -258.3345031738281, | |
| "logps/rejected": -1910.637451171875, | |
| "logps_avg/chosen": -0.7704434394836426, | |
| "logps_avg/rejected": -6.301905155181885, | |
| "loss": 0.6922, | |
| "losses_ref": -0.05841520428657532, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 255, | |
| "u": -2.230961322784424, | |
| "weight": 0.06754828989505768 | |
| }, | |
| { | |
| "diff_generated": -21.22915267944336, | |
| "epoch": 0.5443601151531012, | |
| "grad_norm": 6.052398997240752, | |
| "learning_rate": 1.8250583305165094e-06, | |
| "logits/chosen": -2.3323371410369873, | |
| "logits/rejected": -2.212430477142334, | |
| "logps/chosen": -235.18038940429688, | |
| "logps/rejected": -1926.814453125, | |
| "logps_avg/chosen": -0.6792945861816406, | |
| "logps_avg/rejected": -6.368745803833008, | |
| "loss": 0.6742, | |
| "losses_ref": -0.047284115105867386, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 260, | |
| "u": -2.220738172531128, | |
| "weight": 0.06843873858451843 | |
| }, | |
| { | |
| "diff_generated": -21.301851272583008, | |
| "epoch": 0.5548285789060455, | |
| "grad_norm": 6.4499158810515755, | |
| "learning_rate": 1.8145759520503357e-06, | |
| "logits/chosen": -2.4637808799743652, | |
| "logits/rejected": -2.2752346992492676, | |
| "logps/chosen": -246.92269897460938, | |
| "logps/rejected": -1889.571533203125, | |
| "logps_avg/chosen": -0.7389290928840637, | |
| "logps_avg/rejected": -6.390555381774902, | |
| "loss": 0.6763, | |
| "losses_ref": -0.05337480455636978, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 265, | |
| "u": -2.2342476844787598, | |
| "weight": 0.06287747621536255 | |
| }, | |
| { | |
| "diff_generated": -24.129053115844727, | |
| "epoch": 0.5652970426589898, | |
| "grad_norm": 6.150486891273085, | |
| "learning_rate": 1.803820559763439e-06, | |
| "logits/chosen": -2.463932752609253, | |
| "logits/rejected": -2.262209415435791, | |
| "logps/chosen": -218.674072265625, | |
| "logps/rejected": -2158.11376953125, | |
| "logps_avg/chosen": -0.7358182072639465, | |
| "logps_avg/rejected": -7.238715171813965, | |
| "loss": 0.7092, | |
| "losses_ref": -0.05700932815670967, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 270, | |
| "u": -2.235081911087036, | |
| "weight": 0.0649222731590271 | |
| }, | |
| { | |
| "diff_generated": -22.390344619750977, | |
| "epoch": 0.575765506411934, | |
| "grad_norm": 7.077728369391663, | |
| "learning_rate": 1.7927957584402895e-06, | |
| "logits/chosen": -2.4641366004943848, | |
| "logits/rejected": -2.289757251739502, | |
| "logps/chosen": -230.87442016601562, | |
| "logps/rejected": -1978.302734375, | |
| "logps_avg/chosen": -0.6890340447425842, | |
| "logps_avg/rejected": -6.717103004455566, | |
| "loss": 0.6762, | |
| "losses_ref": -0.05622117966413498, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 275, | |
| "u": -2.217959403991699, | |
| "weight": 0.07241992652416229 | |
| }, | |
| { | |
| "diff_generated": -21.651906967163086, | |
| "epoch": 0.5862339701648783, | |
| "grad_norm": 6.269922997412507, | |
| "learning_rate": 1.78150524316067e-06, | |
| "logits/chosen": -2.512561082839966, | |
| "logits/rejected": -2.3291046619415283, | |
| "logps/chosen": -247.04129028320312, | |
| "logps/rejected": -1997.1549072265625, | |
| "logps_avg/chosen": -0.7235974073410034, | |
| "logps_avg/rejected": -6.495572566986084, | |
| "loss": 0.6702, | |
| "losses_ref": -0.04933195561170578, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 280, | |
| "u": -2.2435684204101562, | |
| "weight": 0.05631055310368538 | |
| }, | |
| { | |
| "diff_generated": -24.519784927368164, | |
| "epoch": 0.5967024339178225, | |
| "grad_norm": 6.4591538424452475, | |
| "learning_rate": 1.7699527980612304e-06, | |
| "logits/chosen": -2.533612012863159, | |
| "logits/rejected": -2.310060501098633, | |
| "logps/chosen": -241.06430053710938, | |
| "logps/rejected": -2117.74609375, | |
| "logps_avg/chosen": -0.7511512041091919, | |
| "logps_avg/rejected": -7.355935573577881, | |
| "loss": 0.7064, | |
| "losses_ref": -0.0406634621322155, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 285, | |
| "u": -2.252286434173584, | |
| "weight": 0.04695131629705429 | |
| }, | |
| { | |
| "diff_generated": -22.960649490356445, | |
| "epoch": 0.6071708976707668, | |
| "grad_norm": 6.2154170319293724, | |
| "learning_rate": 1.758142295067194e-06, | |
| "logits/chosen": -2.508026123046875, | |
| "logits/rejected": -2.2768871784210205, | |
| "logps/chosen": -256.1479797363281, | |
| "logps/rejected": -2004.0556640625, | |
| "logps_avg/chosen": -0.7584555745124817, | |
| "logps_avg/rejected": -6.888195037841797, | |
| "loss": 0.6642, | |
| "losses_ref": -0.05948421359062195, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 290, | |
| "u": -2.2216153144836426, | |
| "weight": 0.07435683906078339 | |
| }, | |
| { | |
| "diff_generated": -23.191375732421875, | |
| "epoch": 0.6176393614237111, | |
| "grad_norm": 6.58174772631908, | |
| "learning_rate": 1.7460776925946416e-06, | |
| "logits/chosen": -2.5151877403259277, | |
| "logits/rejected": -2.297478199005127, | |
| "logps/chosen": -233.7965087890625, | |
| "logps/rejected": -2135.15673828125, | |
| "logps_avg/chosen": -0.7887166738510132, | |
| "logps_avg/rejected": -6.957413673400879, | |
| "loss": 0.6755, | |
| "losses_ref": -0.055867087095975876, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 295, | |
| "u": -2.237914562225342, | |
| "weight": 0.062143467366695404 | |
| }, | |
| { | |
| "diff_generated": -24.709823608398438, | |
| "epoch": 0.6281078251766553, | |
| "grad_norm": 7.437442244122165, | |
| "learning_rate": 1.7337630342238039e-06, | |
| "logits/chosen": -2.525470018386841, | |
| "logits/rejected": -2.3166513442993164, | |
| "logps/chosen": -229.94558715820312, | |
| "logps/rejected": -2169.215576171875, | |
| "logps_avg/chosen": -0.7630201578140259, | |
| "logps_avg/rejected": -7.412947177886963, | |
| "loss": 0.7146, | |
| "losses_ref": -0.0521920807659626, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 300, | |
| "u": -2.238734722137451, | |
| "weight": 0.06009601429104805 | |
| }, | |
| { | |
| "diff_generated": -25.142507553100586, | |
| "epoch": 0.6385762889295996, | |
| "grad_norm": 6.81810983140467, | |
| "learning_rate": 1.7212024473438145e-06, | |
| "logits/chosen": -2.5295021533966064, | |
| "logits/rejected": -2.3437719345092773, | |
| "logps/chosen": -230.28018188476562, | |
| "logps/rejected": -2279.5810546875, | |
| "logps_avg/chosen": -0.6913032531738281, | |
| "logps_avg/rejected": -7.54275369644165, | |
| "loss": 0.6605, | |
| "losses_ref": -0.04879279434680939, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 305, | |
| "u": -2.2388291358947754, | |
| "weight": 0.058758098632097244 | |
| }, | |
| { | |
| "diff_generated": -24.345029830932617, | |
| "epoch": 0.6490447526825438, | |
| "grad_norm": 6.09422333137857, | |
| "learning_rate": 1.70840014176937e-06, | |
| "logits/chosen": -2.496091604232788, | |
| "logits/rejected": -2.2605936527252197, | |
| "logps/chosen": -264.0978698730469, | |
| "logps/rejected": -2208.2470703125, | |
| "logps_avg/chosen": -0.7388861179351807, | |
| "logps_avg/rejected": -7.303508758544922, | |
| "loss": 0.6912, | |
| "losses_ref": -0.042303841561079025, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 310, | |
| "u": -2.246720552444458, | |
| "weight": 0.05154282599687576 | |
| }, | |
| { | |
| "diff_generated": -23.305843353271484, | |
| "epoch": 0.6595132164354881, | |
| "grad_norm": 6.009874799920644, | |
| "learning_rate": 1.6953604083297663e-06, | |
| "logits/chosen": -2.5141513347625732, | |
| "logits/rejected": -2.3054990768432617, | |
| "logps/chosen": -241.82406616210938, | |
| "logps/rejected": -2167.42724609375, | |
| "logps_avg/chosen": -0.740818202495575, | |
| "logps_avg/rejected": -6.991753578186035, | |
| "loss": 0.6887, | |
| "losses_ref": -0.0596298985183239, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 315, | |
| "u": -2.2182247638702393, | |
| "weight": 0.07611407339572906 | |
| }, | |
| { | |
| "diff_generated": -27.0042724609375, | |
| "epoch": 0.6699816801884323, | |
| "grad_norm": 5.920473182891855, | |
| "learning_rate": 1.6820876174307821e-06, | |
| "logits/chosen": -2.482053279876709, | |
| "logits/rejected": -2.2886459827423096, | |
| "logps/chosen": -223.24893188476562, | |
| "logps/rejected": -2428.3193359375, | |
| "logps_avg/chosen": -0.7374002933502197, | |
| "logps_avg/rejected": -8.101282119750977, | |
| "loss": 0.6816, | |
| "losses_ref": -0.049068134278059006, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 320, | |
| "u": -2.235114574432373, | |
| "weight": 0.06041133403778076 | |
| }, | |
| { | |
| "diff_generated": -25.161632537841797, | |
| "epoch": 0.6804501439413766, | |
| "grad_norm": 6.759097342452152, | |
| "learning_rate": 1.668586217589889e-06, | |
| "logits/chosen": -2.4576220512390137, | |
| "logits/rejected": -2.2568023204803467, | |
| "logps/chosen": -255.9824676513672, | |
| "logps/rejected": -2272.87451171875, | |
| "logps_avg/chosen": -0.8112290501594543, | |
| "logps_avg/rejected": -7.548490047454834, | |
| "loss": 0.7034, | |
| "losses_ref": -0.04155198484659195, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 325, | |
| "u": -2.2597880363464355, | |
| "weight": 0.04243909567594528 | |
| }, | |
| { | |
| "diff_generated": -26.866863250732422, | |
| "epoch": 0.6909186076943209, | |
| "grad_norm": 5.913181146879915, | |
| "learning_rate": 1.6548607339452852e-06, | |
| "logits/chosen": -2.5034430027008057, | |
| "logits/rejected": -2.2873404026031494, | |
| "logps/chosen": -219.890625, | |
| "logps/rejected": -2450.533203125, | |
| "logps_avg/chosen": -0.7192927598953247, | |
| "logps_avg/rejected": -8.060060501098633, | |
| "loss": 0.679, | |
| "losses_ref": -0.04148325324058533, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 330, | |
| "u": -2.258359432220459, | |
| "weight": 0.04333708435297012 | |
| }, | |
| { | |
| "diff_generated": -26.58041000366211, | |
| "epoch": 0.7013870714472651, | |
| "grad_norm": 6.258862828154151, | |
| "learning_rate": 1.6409157667392455e-06, | |
| "logits/chosen": -2.5029423236846924, | |
| "logits/rejected": -2.2649450302124023, | |
| "logps/chosen": -239.6374969482422, | |
| "logps/rejected": -2410.685302734375, | |
| "logps_avg/chosen": -0.7706997990608215, | |
| "logps_avg/rejected": -7.974122524261475, | |
| "loss": 0.7035, | |
| "losses_ref": -0.05212752893567085, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 335, | |
| "u": -2.2294507026672363, | |
| "weight": 0.06685757637023926 | |
| }, | |
| { | |
| "diff_generated": -30.061986923217773, | |
| "epoch": 0.7118555352002094, | |
| "grad_norm": 7.477206152513725, | |
| "learning_rate": 1.6267559897763027e-06, | |
| "logits/chosen": -2.3795595169067383, | |
| "logits/rejected": -2.18742036819458, | |
| "logps/chosen": -192.0414276123047, | |
| "logps/rejected": -2674.73486328125, | |
| "logps_avg/chosen": -0.7409474849700928, | |
| "logps_avg/rejected": -9.018596649169922, | |
| "loss": 0.6831, | |
| "losses_ref": -0.044330693781375885, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 340, | |
| "u": -2.235874652862549, | |
| "weight": 0.059127964079380035 | |
| }, | |
| { | |
| "diff_generated": -28.720870971679688, | |
| "epoch": 0.7223239989531536, | |
| "grad_norm": 5.9002590426162325, | |
| "learning_rate": 1.6123861488567708e-06, | |
| "logits/chosen": -2.4881465435028076, | |
| "logits/rejected": -2.2146873474121094, | |
| "logps/chosen": -260.3475341796875, | |
| "logps/rejected": -2515.25, | |
| "logps_avg/chosen": -0.7461652755737305, | |
| "logps_avg/rejected": -8.61626148223877, | |
| "loss": 0.6968, | |
| "losses_ref": -0.044901080429553986, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 345, | |
| "u": -2.2523629665374756, | |
| "weight": 0.04855785518884659 | |
| }, | |
| { | |
| "diff_generated": -28.04868507385254, | |
| "epoch": 0.7327924627060979, | |
| "grad_norm": 15.894199978415127, | |
| "learning_rate": 1.5978110601861409e-06, | |
| "logits/chosen": -2.471588611602783, | |
| "logits/rejected": -2.2580113410949707, | |
| "logps/chosen": -255.3411102294922, | |
| "logps/rejected": -2506.482666015625, | |
| "logps_avg/chosen": -0.7827759385108948, | |
| "logps_avg/rejected": -8.414606094360352, | |
| "loss": 0.7362, | |
| "losses_ref": -0.04014447331428528, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 350, | |
| "u": -2.2483315467834473, | |
| "weight": 0.04962104931473732 | |
| }, | |
| { | |
| "diff_generated": -27.11871337890625, | |
| "epoch": 0.7432609264590422, | |
| "grad_norm": 5.4012187487436725, | |
| "learning_rate": 1.5830356087608763e-06, | |
| "logits/chosen": -2.4285144805908203, | |
| "logits/rejected": -2.1649179458618164, | |
| "logps/chosen": -218.6619415283203, | |
| "logps/rejected": -2413.4892578125, | |
| "logps_avg/chosen": -0.7086374163627625, | |
| "logps_avg/rejected": -8.135615348815918, | |
| "loss": 0.7021, | |
| "losses_ref": -0.03781733289361, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 355, | |
| "u": -2.2616829872131348, | |
| "weight": 0.0397658608853817 | |
| }, | |
| { | |
| "diff_generated": -26.739330291748047, | |
| "epoch": 0.7537293902119864, | |
| "grad_norm": 6.5263260405852614, | |
| "learning_rate": 1.5680647467311555e-06, | |
| "logits/chosen": -2.4289963245391846, | |
| "logits/rejected": -2.133953332901001, | |
| "logps/chosen": -247.11563110351562, | |
| "logps/rejected": -2465.95849609375, | |
| "logps_avg/chosen": -0.7823926210403442, | |
| "logps_avg/rejected": -8.02180004119873, | |
| "loss": 0.6853, | |
| "losses_ref": -0.0527551993727684, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 360, | |
| "u": -2.2420668601989746, | |
| "weight": 0.0583949089050293 | |
| }, | |
| { | |
| "diff_generated": -27.283761978149414, | |
| "epoch": 0.7641978539649307, | |
| "grad_norm": 6.979588218526593, | |
| "learning_rate": 1.552903491741107e-06, | |
| "logits/chosen": -2.449387550354004, | |
| "logits/rejected": -2.1368231773376465, | |
| "logps/chosen": -234.6686553955078, | |
| "logps/rejected": -2578.747802734375, | |
| "logps_avg/chosen": -0.740507185459137, | |
| "logps_avg/rejected": -8.185129165649414, | |
| "loss": 0.6824, | |
| "losses_ref": -0.03961649537086487, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 365, | |
| "u": -2.2473196983337402, | |
| "weight": 0.05010756850242615 | |
| }, | |
| { | |
| "diff_generated": -26.48313331604004, | |
| "epoch": 0.7746663177178749, | |
| "grad_norm": 7.697158528726637, | |
| "learning_rate": 1.5375569252470895e-06, | |
| "logits/chosen": -2.5160136222839355, | |
| "logits/rejected": -2.2105443477630615, | |
| "logps/chosen": -270.76727294921875, | |
| "logps/rejected": -2356.61376953125, | |
| "logps_avg/chosen": -0.798203706741333, | |
| "logps_avg/rejected": -7.944940090179443, | |
| "loss": 0.6956, | |
| "losses_ref": -0.03987672179937363, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 370, | |
| "u": -2.259321689605713, | |
| "weight": 0.042373161762952805 | |
| }, | |
| { | |
| "diff_generated": -25.16873550415039, | |
| "epoch": 0.7851347814708192, | |
| "grad_norm": 6.394620991151716, | |
| "learning_rate": 1.5220301908145903e-06, | |
| "logits/chosen": -2.464231491088867, | |
| "logits/rejected": -2.1346030235290527, | |
| "logps/chosen": -240.89230346679688, | |
| "logps/rejected": -2322.256591796875, | |
| "logps_avg/chosen": -0.6929277181625366, | |
| "logps_avg/rejected": -7.55062198638916, | |
| "loss": 0.6924, | |
| "losses_ref": -0.04263712465763092, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 375, | |
| "u": -2.2447123527526855, | |
| "weight": 0.05186506360769272 | |
| }, | |
| { | |
| "diff_generated": -26.598400115966797, | |
| "epoch": 0.7956032452237635, | |
| "grad_norm": 6.833084085030009, | |
| "learning_rate": 1.5063284923943028e-06, | |
| "logits/chosen": -2.4700121879577637, | |
| "logits/rejected": -2.1623213291168213, | |
| "logps/chosen": -255.25228881835938, | |
| "logps/rejected": -2325.41162109375, | |
| "logps_avg/chosen": -0.7505702376365662, | |
| "logps_avg/rejected": -7.97952127456665, | |
| "loss": 0.6914, | |
| "losses_ref": -0.039328016340732574, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 380, | |
| "u": -2.2589406967163086, | |
| "weight": 0.04286640137434006 | |
| }, | |
| { | |
| "diff_generated": -29.339923858642578, | |
| "epoch": 0.8060717089767077, | |
| "grad_norm": 6.446112080414134, | |
| "learning_rate": 1.490457092577968e-06, | |
| "logits/chosen": -2.4463934898376465, | |
| "logits/rejected": -2.0776758193969727, | |
| "logps/chosen": -232.91452026367188, | |
| "logps/rejected": -2714.375244140625, | |
| "logps_avg/chosen": -0.6785185933113098, | |
| "logps_avg/rejected": -8.801977157592773, | |
| "loss": 0.6865, | |
| "losses_ref": -0.04436464607715607, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 385, | |
| "u": -2.247189998626709, | |
| "weight": 0.05228755623102188 | |
| }, | |
| { | |
| "diff_generated": -27.133153915405273, | |
| "epoch": 0.816540172729652, | |
| "grad_norm": 5.888520537518448, | |
| "learning_rate": 1.4744213108345602e-06, | |
| "logits/chosen": -2.5249063968658447, | |
| "logits/rejected": -2.1448757648468018, | |
| "logps/chosen": -258.61212158203125, | |
| "logps/rejected": -2449.294677734375, | |
| "logps_avg/chosen": -0.7527631521224976, | |
| "logps_avg/rejected": -8.139945983886719, | |
| "loss": 0.685, | |
| "losses_ref": -0.0589534267783165, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 390, | |
| "u": -2.2169764041900635, | |
| "weight": 0.0769612193107605 | |
| }, | |
| { | |
| "diff_generated": -27.033132553100586, | |
| "epoch": 0.8270086364825961, | |
| "grad_norm": 6.211789156823427, | |
| "learning_rate": 1.4582265217274103e-06, | |
| "logits/chosen": -2.4122936725616455, | |
| "logits/rejected": -2.095203161239624, | |
| "logps/chosen": -251.5576629638672, | |
| "logps/rejected": -2401.735595703125, | |
| "logps_avg/chosen": -0.7489043474197388, | |
| "logps_avg/rejected": -8.109941482543945, | |
| "loss": 0.6753, | |
| "losses_ref": -0.048131681978702545, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 395, | |
| "u": -2.247305393218994, | |
| "weight": 0.053915899246931076 | |
| }, | |
| { | |
| "diff_generated": -30.035808563232422, | |
| "epoch": 0.8374771002355405, | |
| "grad_norm": 6.698107767192597, | |
| "learning_rate": 1.4418781531128635e-06, | |
| "logits/chosen": -2.486995220184326, | |
| "logits/rejected": -2.131185531616211, | |
| "logps/chosen": -239.08642578125, | |
| "logps/rejected": -2759.15625, | |
| "logps_avg/chosen": -0.7630764245986938, | |
| "logps_avg/rejected": -9.010741233825684, | |
| "loss": 0.6892, | |
| "losses_ref": -0.036631032824516296, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 400, | |
| "u": -2.2631499767303467, | |
| "weight": 0.038820598274469376 | |
| }, | |
| { | |
| "diff_generated": -27.721935272216797, | |
| "epoch": 0.8479455639884846, | |
| "grad_norm": 7.031324917308057, | |
| "learning_rate": 1.4253816843210748e-06, | |
| "logits/chosen": -2.4483680725097656, | |
| "logits/rejected": -2.089618444442749, | |
| "logps/chosen": -249.0079803466797, | |
| "logps/rejected": -2574.352783203125, | |
| "logps_avg/chosen": -0.722091019153595, | |
| "logps_avg/rejected": -8.316580772399902, | |
| "loss": 0.7066, | |
| "losses_ref": -0.043711207807064056, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 405, | |
| "u": -2.2515838146209717, | |
| "weight": 0.048544611781835556 | |
| }, | |
| { | |
| "diff_generated": -30.00594711303711, | |
| "epoch": 0.8584140277414289, | |
| "grad_norm": 5.878873328550679, | |
| "learning_rate": 1.4087426443195547e-06, | |
| "logits/chosen": -2.4377264976501465, | |
| "logits/rejected": -2.0860629081726074, | |
| "logps/chosen": -220.13644409179688, | |
| "logps/rejected": -2700.03369140625, | |
| "logps_avg/chosen": -0.7378045916557312, | |
| "logps_avg/rejected": -9.001784324645996, | |
| "loss": 0.6757, | |
| "losses_ref": -0.032459113746881485, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 410, | |
| "u": -2.2480547428131104, | |
| "weight": 0.04561341553926468 | |
| }, | |
| { | |
| "diff_generated": -27.96181297302246, | |
| "epoch": 0.8688824914943732, | |
| "grad_norm": 6.085121754886306, | |
| "learning_rate": 1.391966609860075e-06, | |
| "logits/chosen": -2.4773359298706055, | |
| "logits/rejected": -2.129520893096924, | |
| "logps/chosen": -239.4454803466797, | |
| "logps/rejected": -2550.92919921875, | |
| "logps_avg/chosen": -0.7163268327713013, | |
| "logps_avg/rejected": -8.388544082641602, | |
| "loss": 0.6864, | |
| "losses_ref": -0.03842215612530708, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 415, | |
| "u": -2.2578535079956055, | |
| "weight": 0.04306939244270325 | |
| }, | |
| { | |
| "diff_generated": -27.701797485351562, | |
| "epoch": 0.8793509552473174, | |
| "grad_norm": 6.898834621323108, | |
| "learning_rate": 1.3750592036095619e-06, | |
| "logits/chosen": -2.4759981632232666, | |
| "logits/rejected": -2.1207737922668457, | |
| "logps/chosen": -255.3009490966797, | |
| "logps/rejected": -2467.61328125, | |
| "logps_avg/chosen": -0.7468316555023193, | |
| "logps_avg/rejected": -8.310539245605469, | |
| "loss": 0.6929, | |
| "losses_ref": -0.050536155700683594, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 420, | |
| "u": -2.2376935482025146, | |
| "weight": 0.05989469215273857 | |
| }, | |
| { | |
| "diff_generated": -27.900798797607422, | |
| "epoch": 0.8898194190002617, | |
| "grad_norm": 7.318402161699278, | |
| "learning_rate": 1.3580260922655984e-06, | |
| "logits/chosen": -2.459564685821533, | |
| "logits/rejected": -2.133777379989624, | |
| "logps/chosen": -232.8207550048828, | |
| "logps/rejected": -2438.7041015625, | |
| "logps_avg/chosen": -0.7522517442703247, | |
| "logps_avg/rejected": -8.370241165161133, | |
| "loss": 0.6907, | |
| "losses_ref": -0.040023092180490494, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 425, | |
| "u": -2.2584662437438965, | |
| "weight": 0.043312422931194305 | |
| }, | |
| { | |
| "diff_generated": -29.812658309936523, | |
| "epoch": 0.9002878827532059, | |
| "grad_norm": 6.38418063766252, | |
| "learning_rate": 1.3408729846571713e-06, | |
| "logits/chosen": -2.4594979286193848, | |
| "logits/rejected": -2.071135997772217, | |
| "logps/chosen": -280.634521484375, | |
| "logps/rejected": -2652.205322265625, | |
| "logps_avg/chosen": -0.7122408747673035, | |
| "logps_avg/rejected": -8.943798065185547, | |
| "loss": 0.6859, | |
| "losses_ref": -0.03510651737451553, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 430, | |
| "u": -2.2575087547302246, | |
| "weight": 0.04237521067261696 | |
| }, | |
| { | |
| "diff_generated": -25.203630447387695, | |
| "epoch": 0.9107563465061502, | |
| "grad_norm": 6.588604544150575, | |
| "learning_rate": 1.3236056298312956e-06, | |
| "logits/chosen": -2.481071949005127, | |
| "logits/rejected": -2.1608479022979736, | |
| "logps/chosen": -234.13027954101562, | |
| "logps/rejected": -2276.569580078125, | |
| "logps_avg/chosen": -0.7077358365058899, | |
| "logps_avg/rejected": -7.561089992523193, | |
| "loss": 0.6722, | |
| "losses_ref": -0.04718080908060074, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 435, | |
| "u": -2.249262809753418, | |
| "weight": 0.05173084884881973 | |
| }, | |
| { | |
| "diff_generated": -23.8907470703125, | |
| "epoch": 0.9212248102590945, | |
| "grad_norm": 6.857956310477159, | |
| "learning_rate": 1.3062298151261591e-06, | |
| "logits/chosen": -2.5335617065429688, | |
| "logits/rejected": -2.219560146331787, | |
| "logps/chosen": -250.57705688476562, | |
| "logps/rejected": -2190.95947265625, | |
| "logps_avg/chosen": -0.6971117854118347, | |
| "logps_avg/rejected": -7.167223930358887, | |
| "loss": 0.6666, | |
| "losses_ref": -0.04138738289475441, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 440, | |
| "u": -2.250927686691284, | |
| "weight": 0.04870566353201866 | |
| }, | |
| { | |
| "diff_generated": -24.81663703918457, | |
| "epoch": 0.9316932740120387, | |
| "grad_norm": 7.035268937333438, | |
| "learning_rate": 1.2887513642314372e-06, | |
| "logits/chosen": -2.466610908508301, | |
| "logits/rejected": -2.1418159008026123, | |
| "logps/chosen": -234.072021484375, | |
| "logps/rejected": -2254.32177734375, | |
| "logps_avg/chosen": -0.7226396203041077, | |
| "logps_avg/rejected": -7.4449920654296875, | |
| "loss": 0.6772, | |
| "losses_ref": -0.02925349771976471, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 445, | |
| "u": -2.261955738067627, | |
| "weight": 0.036579299718141556 | |
| }, | |
| { | |
| "diff_generated": -23.858409881591797, | |
| "epoch": 0.942161737764983, | |
| "grad_norm": 5.8496221029871895, | |
| "learning_rate": 1.271176135236417e-06, | |
| "logits/chosen": -2.5474791526794434, | |
| "logits/rejected": -2.2467288970947266, | |
| "logps/chosen": -259.63043212890625, | |
| "logps/rejected": -2068.02978515625, | |
| "logps_avg/chosen": -0.7590965032577515, | |
| "logps_avg/rejected": -7.157523155212402, | |
| "loss": 0.6926, | |
| "losses_ref": -0.04666949436068535, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 450, | |
| "u": -2.24082612991333, | |
| "weight": 0.057170577347278595 | |
| }, | |
| { | |
| "diff_generated": -21.257368087768555, | |
| "epoch": 0.9526302015179272, | |
| "grad_norm": 9.579263194990599, | |
| "learning_rate": 1.2535100186666e-06, | |
| "logits/chosen": -2.5334389209747314, | |
| "logits/rejected": -2.2800872325897217, | |
| "logps/chosen": -258.4393615722656, | |
| "logps/rejected": -1949.274658203125, | |
| "logps_avg/chosen": -0.7667442560195923, | |
| "logps_avg/rejected": -6.377211093902588, | |
| "loss": 0.7272, | |
| "losses_ref": -0.04685154929757118, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 455, | |
| "u": -2.239955186843872, | |
| "weight": 0.05661741644144058 | |
| }, | |
| { | |
| "diff_generated": -21.260351181030273, | |
| "epoch": 0.9630986652708715, | |
| "grad_norm": 7.19097418251884, | |
| "learning_rate": 1.2357589355094273e-06, | |
| "logits/chosen": -2.5235114097595215, | |
| "logits/rejected": -2.2688846588134766, | |
| "logps/chosen": -274.0472106933594, | |
| "logps/rejected": -1854.4193115234375, | |
| "logps_avg/chosen": -0.7401561141014099, | |
| "logps_avg/rejected": -6.378105163574219, | |
| "loss": 0.6996, | |
| "losses_ref": -0.04187412187457085, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 460, | |
| "u": -2.255484104156494, | |
| "weight": 0.04560910537838936 | |
| }, | |
| { | |
| "diff_generated": -20.870580673217773, | |
| "epoch": 0.9735671290238157, | |
| "grad_norm": 5.873029901097039, | |
| "learning_rate": 1.2179288352297982e-06, | |
| "logits/chosen": -2.5459725856781006, | |
| "logits/rejected": -2.300191879272461, | |
| "logps/chosen": -233.07363891601562, | |
| "logps/rejected": -1780.218505859375, | |
| "logps_avg/chosen": -0.676838755607605, | |
| "logps_avg/rejected": -6.26117467880249, | |
| "loss": 0.701, | |
| "losses_ref": -0.035965751856565475, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 465, | |
| "u": -2.2623190879821777, | |
| "weight": 0.03852839767932892 | |
| }, | |
| { | |
| "diff_generated": -20.314434051513672, | |
| "epoch": 0.98403559277676, | |
| "grad_norm": 6.047640955364439, | |
| "learning_rate": 1.2000256937760445e-06, | |
| "logits/chosen": -2.478569746017456, | |
| "logits/rejected": -2.2165324687957764, | |
| "logps/chosen": -241.59115600585938, | |
| "logps/rejected": -1793.3131103515625, | |
| "logps_avg/chosen": -0.7300271987915039, | |
| "logps_avg/rejected": -6.094330787658691, | |
| "loss": 0.6661, | |
| "losses_ref": -0.03309565782546997, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 470, | |
| "u": -2.2623355388641357, | |
| "weight": 0.03777972236275673 | |
| }, | |
| { | |
| "diff_generated": -20.79926109313965, | |
| "epoch": 0.9945040565297043, | |
| "grad_norm": 8.164412498048108, | |
| "learning_rate": 1.1820555115770255e-06, | |
| "logits/chosen": -2.5342564582824707, | |
| "logits/rejected": -2.2890594005584717, | |
| "logps/chosen": -230.3572235107422, | |
| "logps/rejected": -1833.0390625, | |
| "logps_avg/chosen": -0.751907467842102, | |
| "logps_avg/rejected": -6.239778995513916, | |
| "loss": 0.6895, | |
| "losses_ref": -0.03975466638803482, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 475, | |
| "u": -2.2550594806671143, | |
| "weight": 0.04479961842298508 | |
| }, | |
| { | |
| "diff_generated": -20.89034080505371, | |
| "epoch": 1.0049725202826485, | |
| "grad_norm": 9.91162629957212, | |
| "learning_rate": 1.1640243115310217e-06, | |
| "logits/chosen": -2.515481948852539, | |
| "logits/rejected": -2.238800525665283, | |
| "logps/chosen": -238.7968292236328, | |
| "logps/rejected": -1904.6226806640625, | |
| "logps_avg/chosen": -0.730613112449646, | |
| "logps_avg/rejected": -6.2671027183532715, | |
| "loss": 0.6185, | |
| "losses_ref": -0.0886848121881485, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 480, | |
| "u": -3.230264186859131, | |
| "weight": 0.0876741111278534 | |
| }, | |
| { | |
| "diff_generated": -22.67684555053711, | |
| "epoch": 1.0154409840355927, | |
| "grad_norm": 8.949481189927978, | |
| "learning_rate": 1.1459381369870972e-06, | |
| "logits/chosen": -2.4899425506591797, | |
| "logits/rejected": -2.1274173259735107, | |
| "logps/chosen": -239.3141632080078, | |
| "logps/rejected": -2098.4287109375, | |
| "logps_avg/chosen": -0.6295738816261292, | |
| "logps_avg/rejected": -6.8030548095703125, | |
| "loss": 0.5199, | |
| "losses_ref": -0.09897326678037643, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 485, | |
| "u": -4.378706932067871, | |
| "weight": 0.08350441604852676 | |
| }, | |
| { | |
| "diff_generated": -24.163660049438477, | |
| "epoch": 1.025909447788537, | |
| "grad_norm": 8.708694233875605, | |
| "learning_rate": 1.1278030497196046e-06, | |
| "logits/chosen": -2.448932409286499, | |
| "logits/rejected": -2.0961108207702637, | |
| "logps/chosen": -197.19461059570312, | |
| "logps/rejected": -2133.96630859375, | |
| "logps_avg/chosen": -0.5785419940948486, | |
| "logps_avg/rejected": -7.2490973472595215, | |
| "loss": 0.5111, | |
| "losses_ref": -0.12583398818969727, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 490, | |
| "u": -4.304468631744385, | |
| "weight": 0.12433832883834839 | |
| }, | |
| { | |
| "diff_generated": -25.089040756225586, | |
| "epoch": 1.0363779115414813, | |
| "grad_norm": 8.538618246529412, | |
| "learning_rate": 1.1096251278965172e-06, | |
| "logits/chosen": -2.4840457439422607, | |
| "logits/rejected": -2.1427814960479736, | |
| "logps/chosen": -202.72528076171875, | |
| "logps/rejected": -2115.415283203125, | |
| "logps_avg/chosen": -0.5992251038551331, | |
| "logps_avg/rejected": -7.526711940765381, | |
| "loss": 0.4987, | |
| "losses_ref": -0.10639525949954987, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 495, | |
| "u": -4.343268394470215, | |
| "weight": 0.10977953672409058 | |
| }, | |
| { | |
| "diff_generated": -24.132022857666016, | |
| "epoch": 1.0468463752944255, | |
| "grad_norm": 7.67811116418592, | |
| "learning_rate": 1.0914104640422679e-06, | |
| "logits/chosen": -2.4932920932769775, | |
| "logits/rejected": -2.1089999675750732, | |
| "logps/chosen": -199.10342407226562, | |
| "logps/rejected": -2176.26318359375, | |
| "logps_avg/chosen": -0.6183401346206665, | |
| "logps_avg/rejected": -7.2396063804626465, | |
| "loss": 0.5202, | |
| "losses_ref": -0.12012694031000137, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 500, | |
| "u": -4.302992820739746, | |
| "weight": 0.11936072260141373 | |
| }, | |
| { | |
| "diff_generated": -23.451740264892578, | |
| "epoch": 1.05731483904737, | |
| "grad_norm": 20.37435210253164, | |
| "learning_rate": 1.0731651629957721e-06, | |
| "logits/chosen": -2.42221736907959, | |
| "logits/rejected": -2.1496148109436035, | |
| "logps/chosen": -229.11068725585938, | |
| "logps/rejected": -2094.52197265625, | |
| "logps_avg/chosen": -0.6533752679824829, | |
| "logps_avg/rejected": -7.035521507263184, | |
| "loss": 0.5184, | |
| "losses_ref": -0.1230870932340622, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 505, | |
| "u": -4.369751930236816, | |
| "weight": 0.1066075786948204 | |
| }, | |
| { | |
| "diff_generated": -22.74098777770996, | |
| "epoch": 1.067783302800314, | |
| "grad_norm": 7.268444145722818, | |
| "learning_rate": 1.0548953398643274e-06, | |
| "logits/chosen": -2.4076297283172607, | |
| "logits/rejected": -2.0819380283355713, | |
| "logps/chosen": -233.77938842773438, | |
| "logps/rejected": -2035.225830078125, | |
| "logps_avg/chosen": -0.6575011014938354, | |
| "logps_avg/rejected": -6.822296142578125, | |
| "loss": 0.4947, | |
| "losses_ref": -0.14097091555595398, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 510, | |
| "u": -4.253186225891113, | |
| "weight": 0.1333218514919281 | |
| }, | |
| { | |
| "diff_generated": -25.923725128173828, | |
| "epoch": 1.0782517665532583, | |
| "grad_norm": 8.062661700192072, | |
| "learning_rate": 1.0366071179740706e-06, | |
| "logits/chosen": -2.4787120819091797, | |
| "logits/rejected": -2.12414288520813, | |
| "logps/chosen": -257.2312927246094, | |
| "logps/rejected": -2302.900634765625, | |
| "logps_avg/chosen": -0.6627689003944397, | |
| "logps_avg/rejected": -7.777116298675537, | |
| "loss": 0.5085, | |
| "losses_ref": -0.10705102980136871, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 515, | |
| "u": -4.345104217529297, | |
| "weight": 0.10459395498037338 | |
| }, | |
| { | |
| "diff_generated": -27.071746826171875, | |
| "epoch": 1.0887202303062025, | |
| "grad_norm": 7.3598703596101975, | |
| "learning_rate": 1.0183066268176775e-06, | |
| "logits/chosen": -2.436248779296875, | |
| "logits/rejected": -2.075679063796997, | |
| "logps/chosen": -244.1257781982422, | |
| "logps/rejected": -2375.113525390625, | |
| "logps_avg/chosen": -0.6157761812210083, | |
| "logps_avg/rejected": -8.1215238571167, | |
| "loss": 0.5683, | |
| "losses_ref": -0.08251279592514038, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 520, | |
| "u": -4.396883010864258, | |
| "weight": 0.06908340752124786 | |
| }, | |
| { | |
| "diff_generated": -26.481449127197266, | |
| "epoch": 1.0991886940591469, | |
| "grad_norm": 8.892060607648993, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -2.4646589756011963, | |
| "logits/rejected": -2.096703290939331, | |
| "logps/chosen": -226.17453002929688, | |
| "logps/rejected": -2343.119384765625, | |
| "logps_avg/chosen": -0.6375609040260315, | |
| "logps_avg/rejected": -7.944435119628906, | |
| "loss": 0.5652, | |
| "losses_ref": -0.08028392493724823, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 525, | |
| "u": -4.399170398712158, | |
| "weight": 0.07661790400743484 | |
| }, | |
| { | |
| "diff_generated": -25.77886962890625, | |
| "epoch": 1.109657157812091, | |
| "grad_norm": 11.93280848823974, | |
| "learning_rate": 9.816933731823228e-07, | |
| "logits/chosen": -2.4755985736846924, | |
| "logits/rejected": -2.1236746311187744, | |
| "logps/chosen": -219.5588836669922, | |
| "logps/rejected": -2258.547119140625, | |
| "logps_avg/chosen": -0.6109720468521118, | |
| "logps_avg/rejected": -7.733660697937012, | |
| "loss": 0.5032, | |
| "losses_ref": -0.09919899702072144, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 530, | |
| "u": -4.327098846435547, | |
| "weight": 0.0968068465590477 | |
| }, | |
| { | |
| "diff_generated": -26.962757110595703, | |
| "epoch": 1.1201256215650353, | |
| "grad_norm": 11.74024044453861, | |
| "learning_rate": 9.633928820259293e-07, | |
| "logits/chosen": -2.382981777191162, | |
| "logits/rejected": -1.9988247156143188, | |
| "logps/chosen": -198.56578063964844, | |
| "logps/rejected": -2398.09326171875, | |
| "logps_avg/chosen": -0.6096338033676147, | |
| "logps_avg/rejected": -8.088827133178711, | |
| "loss": 0.5305, | |
| "losses_ref": -0.06856809556484222, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 535, | |
| "u": -4.411980152130127, | |
| "weight": 0.06424126774072647 | |
| }, | |
| { | |
| "diff_generated": -26.22715187072754, | |
| "epoch": 1.1305940853179797, | |
| "grad_norm": 11.054487118285914, | |
| "learning_rate": 9.451046601356725e-07, | |
| "logits/chosen": -2.4410181045532227, | |
| "logits/rejected": -2.095543146133423, | |
| "logps/chosen": -207.6184844970703, | |
| "logps/rejected": -2253.38623046875, | |
| "logps_avg/chosen": -0.6336568593978882, | |
| "logps_avg/rejected": -7.868145942687988, | |
| "loss": 0.5357, | |
| "losses_ref": -0.0955720990896225, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 540, | |
| "u": -4.348645210266113, | |
| "weight": 0.09204810112714767 | |
| }, | |
| { | |
| "diff_generated": -25.266141891479492, | |
| "epoch": 1.1410625490709239, | |
| "grad_norm": 8.805909515635294, | |
| "learning_rate": 9.268348370042281e-07, | |
| "logits/chosen": -2.4485838413238525, | |
| "logits/rejected": -2.1053905487060547, | |
| "logps/chosen": -216.48910522460938, | |
| "logps/rejected": -2250.44775390625, | |
| "logps_avg/chosen": -0.588961124420166, | |
| "logps_avg/rejected": -7.579843044281006, | |
| "loss": 0.5159, | |
| "losses_ref": -0.09172032028436661, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 545, | |
| "u": -4.326230525970459, | |
| "weight": 0.09086887538433075 | |
| }, | |
| { | |
| "diff_generated": -26.917110443115234, | |
| "epoch": 1.151531012823868, | |
| "grad_norm": 10.666064793677686, | |
| "learning_rate": 9.085895359577323e-07, | |
| "logits/chosen": -2.404174566268921, | |
| "logits/rejected": -2.037463665008545, | |
| "logps/chosen": -205.3460235595703, | |
| "logps/rejected": -2429.36279296875, | |
| "logps_avg/chosen": -0.5989923477172852, | |
| "logps_avg/rejected": -8.07513427734375, | |
| "loss": 0.5332, | |
| "losses_ref": -0.06065789982676506, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 550, | |
| "u": -4.387241363525391, | |
| "weight": 0.0559367910027504 | |
| }, | |
| { | |
| "diff_generated": -25.942188262939453, | |
| "epoch": 1.1619994765768125, | |
| "grad_norm": 10.199822581929254, | |
| "learning_rate": 8.903748721034826e-07, | |
| "logits/chosen": -2.432077407836914, | |
| "logits/rejected": -2.0631113052368164, | |
| "logps/chosen": -209.88076782226562, | |
| "logps/rejected": -2297.24853515625, | |
| "logps_avg/chosen": -0.6222396492958069, | |
| "logps_avg/rejected": -7.782655239105225, | |
| "loss": 0.5436, | |
| "losses_ref": -0.053764212876558304, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 555, | |
| "u": -4.413111209869385, | |
| "weight": 0.05243021994829178 | |
| }, | |
| { | |
| "diff_generated": -26.842655181884766, | |
| "epoch": 1.1724679403297567, | |
| "grad_norm": 9.055623269790141, | |
| "learning_rate": 8.721969502803953e-07, | |
| "logits/chosen": -2.4761881828308105, | |
| "logits/rejected": -2.037745952606201, | |
| "logps/chosen": -228.0619659423828, | |
| "logps/rejected": -2454.422607421875, | |
| "logps_avg/chosen": -0.6156254410743713, | |
| "logps_avg/rejected": -8.052797317504883, | |
| "loss": 0.4938, | |
| "losses_ref": -0.06194459646940231, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 560, | |
| "u": -4.417675018310547, | |
| "weight": 0.05182374641299248 | |
| }, | |
| { | |
| "diff_generated": -25.78971290588379, | |
| "epoch": 1.1829364040827008, | |
| "grad_norm": 11.397081928275703, | |
| "learning_rate": 8.540618630129027e-07, | |
| "logits/chosen": -2.4368996620178223, | |
| "logits/rejected": -2.0613627433776855, | |
| "logps/chosen": -244.33059692382812, | |
| "logps/rejected": -2314.3056640625, | |
| "logps_avg/chosen": -0.6685888171195984, | |
| "logps_avg/rejected": -7.736914157867432, | |
| "loss": 0.5495, | |
| "losses_ref": -0.07071459293365479, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 565, | |
| "u": -4.390778064727783, | |
| "weight": 0.06269918382167816 | |
| }, | |
| { | |
| "diff_generated": -26.82694435119629, | |
| "epoch": 1.193404867835645, | |
| "grad_norm": 9.221832000440747, | |
| "learning_rate": 8.359756884689783e-07, | |
| "logits/chosen": -2.497908115386963, | |
| "logits/rejected": -2.125258207321167, | |
| "logps/chosen": -215.4803009033203, | |
| "logps/rejected": -2407.225830078125, | |
| "logps_avg/chosen": -0.6236811876296997, | |
| "logps_avg/rejected": -8.048083305358887, | |
| "loss": 0.5244, | |
| "losses_ref": -0.08507435768842697, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 570, | |
| "u": -4.405519485473633, | |
| "weight": 0.07415871322154999 | |
| }, | |
| { | |
| "diff_generated": -27.44614601135254, | |
| "epoch": 1.2038733315885894, | |
| "grad_norm": 14.484772212758768, | |
| "learning_rate": 8.179444884229744e-07, | |
| "logits/chosen": -2.415398597717285, | |
| "logits/rejected": -2.0458593368530273, | |
| "logps/chosen": -224.60482788085938, | |
| "logps/rejected": -2476.796142578125, | |
| "logps_avg/chosen": -0.6788522601127625, | |
| "logps_avg/rejected": -8.233844757080078, | |
| "loss": 0.5625, | |
| "losses_ref": -0.05934012681245804, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 575, | |
| "u": -4.461823463439941, | |
| "weight": 0.044574279338121414 | |
| }, | |
| { | |
| "diff_generated": -29.135217666625977, | |
| "epoch": 1.2143417953415336, | |
| "grad_norm": 18.01394064023352, | |
| "learning_rate": 7.999743062239557e-07, | |
| "logits/chosen": -2.4544944763183594, | |
| "logits/rejected": -2.104241371154785, | |
| "logps/chosen": -210.87893676757812, | |
| "logps/rejected": -2643.50390625, | |
| "logps_avg/chosen": -0.6716314554214478, | |
| "logps_avg/rejected": -8.740565299987793, | |
| "loss": 0.5555, | |
| "losses_ref": -0.056417226791381836, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 580, | |
| "u": -4.44529914855957, | |
| "weight": 0.04976346716284752 | |
| }, | |
| { | |
| "diff_generated": -27.484622955322266, | |
| "epoch": 1.2248102590944778, | |
| "grad_norm": 10.29630717051048, | |
| "learning_rate": 7.820711647702017e-07, | |
| "logits/chosen": -2.4541475772857666, | |
| "logits/rejected": -2.0904035568237305, | |
| "logps/chosen": -202.5820770263672, | |
| "logps/rejected": -2515.11962890625, | |
| "logps_avg/chosen": -0.5754384994506836, | |
| "logps_avg/rejected": -8.245387077331543, | |
| "loss": 0.5346, | |
| "losses_ref": -0.08221448957920074, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 585, | |
| "u": -4.365923881530762, | |
| "weight": 0.07960718125104904 | |
| }, | |
| { | |
| "diff_generated": -26.950695037841797, | |
| "epoch": 1.235278722847422, | |
| "grad_norm": 10.223108898541343, | |
| "learning_rate": 7.642410644905726e-07, | |
| "logits/chosen": -2.3840575218200684, | |
| "logits/rejected": -2.0544769763946533, | |
| "logps/chosen": -205.935546875, | |
| "logps/rejected": -2364.6396484375, | |
| "logps_avg/chosen": -0.5895050764083862, | |
| "logps_avg/rejected": -8.08520793914795, | |
| "loss": 0.5503, | |
| "losses_ref": -0.10383725166320801, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 590, | |
| "u": -4.365530490875244, | |
| "weight": 0.09789486229419708 | |
| }, | |
| { | |
| "diff_generated": -29.25247573852539, | |
| "epoch": 1.2457471866003664, | |
| "grad_norm": 12.09100466478698, | |
| "learning_rate": 7.464899813334e-07, | |
| "logits/chosen": -2.3943965435028076, | |
| "logits/rejected": -2.067821979522705, | |
| "logps/chosen": -215.44094848632812, | |
| "logps/rejected": -2522.196533203125, | |
| "logps_avg/chosen": -0.6099680662155151, | |
| "logps_avg/rejected": -8.77574348449707, | |
| "loss": 0.5325, | |
| "losses_ref": -0.07746943831443787, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 595, | |
| "u": -4.356167793273926, | |
| "weight": 0.07601340860128403 | |
| }, | |
| { | |
| "diff_generated": -27.34578514099121, | |
| "epoch": 1.2562156503533106, | |
| "grad_norm": 8.052346731222642, | |
| "learning_rate": 7.288238647635829e-07, | |
| "logits/chosen": -2.435148239135742, | |
| "logits/rejected": -2.1030170917510986, | |
| "logps/chosen": -226.7269744873047, | |
| "logps/rejected": -2427.451171875, | |
| "logps_avg/chosen": -0.6252392530441284, | |
| "logps_avg/rejected": -8.2037353515625, | |
| "loss": 0.5356, | |
| "losses_ref": -0.06464961916208267, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 600, | |
| "u": -4.406424522399902, | |
| "weight": 0.061459980905056 | |
| }, | |
| { | |
| "diff_generated": -27.118465423583984, | |
| "epoch": 1.2666841141062548, | |
| "grad_norm": 11.655006277757288, | |
| "learning_rate": 7.112486357685631e-07, | |
| "logits/chosen": -2.450383424758911, | |
| "logits/rejected": -2.0887584686279297, | |
| "logps/chosen": -222.7769012451172, | |
| "logps/rejected": -2357.30712890625, | |
| "logps_avg/chosen": -0.6189793348312378, | |
| "logps_avg/rejected": -8.135540008544922, | |
| "loss": 0.5517, | |
| "losses_ref": -0.08965682238340378, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 605, | |
| "u": -4.366550445556641, | |
| "weight": 0.09028217941522598 | |
| }, | |
| { | |
| "diff_generated": -27.826339721679688, | |
| "epoch": 1.2771525778591992, | |
| "grad_norm": 8.355569379147827, | |
| "learning_rate": 6.937701848738407e-07, | |
| "logits/chosen": -2.4444997310638428, | |
| "logits/rejected": -2.103099822998047, | |
| "logps/chosen": -200.1586151123047, | |
| "logps/rejected": -2441.192138671875, | |
| "logps_avg/chosen": -0.5492798089981079, | |
| "logps_avg/rejected": -8.347902297973633, | |
| "loss": 0.5273, | |
| "losses_ref": -0.05201203376054764, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 610, | |
| "u": -4.470887660980225, | |
| "weight": 0.04102148860692978 | |
| }, | |
| { | |
| "diff_generated": -27.140499114990234, | |
| "epoch": 1.2876210416121434, | |
| "grad_norm": 19.07484346081228, | |
| "learning_rate": 6.763943701687045e-07, | |
| "logits/chosen": -2.4840033054351807, | |
| "logits/rejected": -2.0714080333709717, | |
| "logps/chosen": -237.1542510986328, | |
| "logps/rejected": -2492.620849609375, | |
| "logps_avg/chosen": -0.6195243000984192, | |
| "logps_avg/rejected": -8.142149925231934, | |
| "loss": 0.5249, | |
| "losses_ref": -0.07448837906122208, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 615, | |
| "u": -4.392305374145508, | |
| "weight": 0.06138737127184868 | |
| }, | |
| { | |
| "diff_generated": -29.331090927124023, | |
| "epoch": 1.2980895053650876, | |
| "grad_norm": 14.350296949575641, | |
| "learning_rate": 6.591270153428288e-07, | |
| "logits/chosen": -2.5314509868621826, | |
| "logits/rejected": -2.1232359409332275, | |
| "logps/chosen": -230.3607940673828, | |
| "logps/rejected": -2496.131103515625, | |
| "logps_avg/chosen": -0.6086186170578003, | |
| "logps_avg/rejected": -8.799327850341797, | |
| "loss": 0.5301, | |
| "losses_ref": -0.05894411355257034, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 620, | |
| "u": -4.37734842300415, | |
| "weight": 0.053639549762010574 | |
| }, | |
| { | |
| "diff_generated": -26.76749038696289, | |
| "epoch": 1.308557969118032, | |
| "grad_norm": 8.772096019129755, | |
| "learning_rate": 6.419739077344016e-07, | |
| "logits/chosen": -2.517256259918213, | |
| "logits/rejected": -2.158301591873169, | |
| "logps/chosen": -236.55648803710938, | |
| "logps/rejected": -2372.91796875, | |
| "logps_avg/chosen": -0.6213998794555664, | |
| "logps_avg/rejected": -8.030247688293457, | |
| "loss": 0.544, | |
| "losses_ref": -0.09482914954423904, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 625, | |
| "u": -4.376565456390381, | |
| "weight": 0.07662535458803177 | |
| }, | |
| { | |
| "diff_generated": -28.416824340820312, | |
| "epoch": 1.3190264328709762, | |
| "grad_norm": 7.8466631670725935, | |
| "learning_rate": 6.24940796390438e-07, | |
| "logits/chosen": -2.4629857540130615, | |
| "logits/rejected": -2.0768308639526367, | |
| "logps/chosen": -214.29360961914062, | |
| "logps/rejected": -2455.93115234375, | |
| "logps_avg/chosen": -0.6123236417770386, | |
| "logps_avg/rejected": -8.52504825592041, | |
| "loss": 0.5392, | |
| "losses_ref": -0.059877872467041016, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 630, | |
| "u": -4.431421756744385, | |
| "weight": 0.043088506907224655 | |
| }, | |
| { | |
| "diff_generated": -29.8402099609375, | |
| "epoch": 1.3294948966239204, | |
| "grad_norm": 20.160929381759352, | |
| "learning_rate": 6.08033390139925e-07, | |
| "logits/chosen": -2.4479854106903076, | |
| "logits/rejected": -2.0140042304992676, | |
| "logps/chosen": -228.12948608398438, | |
| "logps/rejected": -2645.977294921875, | |
| "logps_avg/chosen": -0.6280118227005005, | |
| "logps_avg/rejected": -8.95206356048584, | |
| "loss": 0.5647, | |
| "losses_ref": -0.0805547907948494, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 635, | |
| "u": -4.419759750366211, | |
| "weight": 0.06746160984039307 | |
| }, | |
| { | |
| "diff_generated": -29.193140029907227, | |
| "epoch": 1.3399633603768648, | |
| "grad_norm": 17.984653220174852, | |
| "learning_rate": 5.912573556804452e-07, | |
| "logits/chosen": -2.4721744060516357, | |
| "logits/rejected": -2.0706074237823486, | |
| "logps/chosen": -219.49658203125, | |
| "logps/rejected": -2600.13525390625, | |
| "logps_avg/chosen": -0.5888947248458862, | |
| "logps_avg/rejected": -8.757942199707031, | |
| "loss": 0.5708, | |
| "losses_ref": -0.06751363724470139, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 640, | |
| "u": -4.354660511016846, | |
| "weight": 0.08399678766727448 | |
| }, | |
| { | |
| "diff_generated": -29.59097671508789, | |
| "epoch": 1.350431824129809, | |
| "grad_norm": 8.832363301034992, | |
| "learning_rate": 5.746183156789252e-07, | |
| "logits/chosen": -2.522441864013672, | |
| "logits/rejected": -2.069122076034546, | |
| "logps/chosen": -234.3195343017578, | |
| "logps/rejected": -2680.282470703125, | |
| "logps_avg/chosen": -0.6104280352592468, | |
| "logps_avg/rejected": -8.877291679382324, | |
| "loss": 0.5457, | |
| "losses_ref": -0.05418990179896355, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 645, | |
| "u": -4.4123215675354, | |
| "weight": 0.058007679879665375 | |
| }, | |
| { | |
| "diff_generated": -28.265172958374023, | |
| "epoch": 1.3609002878827532, | |
| "grad_norm": 12.218786161167232, | |
| "learning_rate": 5.581218468871365e-07, | |
| "logits/chosen": -2.4173598289489746, | |
| "logits/rejected": -2.0515952110290527, | |
| "logps/chosen": -190.7438507080078, | |
| "logps/rejected": -2539.76953125, | |
| "logps_avg/chosen": -0.5876272320747375, | |
| "logps_avg/rejected": -8.479551315307617, | |
| "loss": 0.5169, | |
| "losses_ref": -0.08093442767858505, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 650, | |
| "u": -4.324867248535156, | |
| "weight": 0.08522786945104599 | |
| }, | |
| { | |
| "diff_generated": -31.353778839111328, | |
| "epoch": 1.3713687516356974, | |
| "grad_norm": 10.11440836146207, | |
| "learning_rate": 5.417734782725896e-07, | |
| "logits/chosen": -2.459190845489502, | |
| "logits/rejected": -2.060859203338623, | |
| "logps/chosen": -211.8318634033203, | |
| "logps/rejected": -2672.73583984375, | |
| "logps_avg/chosen": -0.5790122151374817, | |
| "logps_avg/rejected": -9.406133651733398, | |
| "loss": 0.5603, | |
| "losses_ref": -0.038860172033309937, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 655, | |
| "u": -4.468893051147461, | |
| "weight": 0.03133354336023331 | |
| }, | |
| { | |
| "diff_generated": -28.588571548461914, | |
| "epoch": 1.3818372153886418, | |
| "grad_norm": 12.583696879491457, | |
| "learning_rate": 5.255786891654399e-07, | |
| "logits/chosen": -2.4734246730804443, | |
| "logits/rejected": -2.0776007175445557, | |
| "logps/chosen": -203.22389221191406, | |
| "logps/rejected": -2578.066162109375, | |
| "logps_avg/chosen": -0.6348826289176941, | |
| "logps_avg/rejected": -8.57657241821289, | |
| "loss": 0.5486, | |
| "losses_ref": -0.06403845548629761, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 660, | |
| "u": -4.37404203414917, | |
| "weight": 0.06924913823604584 | |
| }, | |
| { | |
| "diff_generated": -29.78804588317871, | |
| "epoch": 1.392305679141586, | |
| "grad_norm": 13.032538343695713, | |
| "learning_rate": 5.095429074220319e-07, | |
| "logits/chosen": -2.4960551261901855, | |
| "logits/rejected": -2.1090826988220215, | |
| "logps/chosen": -213.1850128173828, | |
| "logps/rejected": -2626.316162109375, | |
| "logps_avg/chosen": -0.6238334774971008, | |
| "logps_avg/rejected": -8.93641471862793, | |
| "loss": 0.5533, | |
| "losses_ref": -0.06042981147766113, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 665, | |
| "u": -4.39785099029541, | |
| "weight": 0.05738676339387894 | |
| }, | |
| { | |
| "diff_generated": -31.395706176757812, | |
| "epoch": 1.4027741428945302, | |
| "grad_norm": 29.282292978403014, | |
| "learning_rate": 4.936715076056974e-07, | |
| "logits/chosen": -2.519998073577881, | |
| "logits/rejected": -2.1003477573394775, | |
| "logps/chosen": -227.49972534179688, | |
| "logps/rejected": -2841.53759765625, | |
| "logps_avg/chosen": -0.6322627067565918, | |
| "logps_avg/rejected": -9.418710708618164, | |
| "loss": 0.545, | |
| "losses_ref": -0.04599471017718315, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 670, | |
| "u": -4.469276428222656, | |
| "weight": 0.033300966024398804 | |
| }, | |
| { | |
| "diff_generated": -32.34383010864258, | |
| "epoch": 1.4132426066474744, | |
| "grad_norm": 21.235357659003228, | |
| "learning_rate": 4.779698091854098e-07, | |
| "logits/chosen": -2.5733542442321777, | |
| "logits/rejected": -2.1177892684936523, | |
| "logps/chosen": -241.3948516845703, | |
| "logps/rejected": -2941.85205078125, | |
| "logps_avg/chosen": -0.634663999080658, | |
| "logps_avg/rejected": -9.70314884185791, | |
| "loss": 0.5578, | |
| "losses_ref": -0.03548940271139145, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 675, | |
| "u": -4.454364776611328, | |
| "weight": 0.025668436661362648 | |
| }, | |
| { | |
| "diff_generated": -29.166423797607422, | |
| "epoch": 1.4237110704004188, | |
| "grad_norm": 9.728306873667183, | |
| "learning_rate": 4.624430747529102e-07, | |
| "logits/chosen": -2.5310111045837402, | |
| "logits/rejected": -2.1089558601379395, | |
| "logps/chosen": -245.45083618164062, | |
| "logps/rejected": -2643.77001953125, | |
| "logps_avg/chosen": -0.6183468699455261, | |
| "logps_avg/rejected": -8.749927520751953, | |
| "loss": 0.5228, | |
| "losses_ref": -0.08980627357959747, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 680, | |
| "u": -4.334953308105469, | |
| "weight": 0.07751224935054779 | |
| }, | |
| { | |
| "diff_generated": -33.88722610473633, | |
| "epoch": 1.434179534153363, | |
| "grad_norm": 14.616844426526761, | |
| "learning_rate": 4.4709650825889277e-07, | |
| "logits/chosen": -2.460334300994873, | |
| "logits/rejected": -2.0326919555664062, | |
| "logps/chosen": -193.82003784179688, | |
| "logps/rejected": -2947.883544921875, | |
| "logps_avg/chosen": -0.5843343138694763, | |
| "logps_avg/rejected": -10.166168212890625, | |
| "loss": 0.5694, | |
| "losses_ref": -0.03547119349241257, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 685, | |
| "u": -4.463663578033447, | |
| "weight": 0.030837317928671837 | |
| }, | |
| { | |
| "diff_generated": -30.6027774810791, | |
| "epoch": 1.4446479979063072, | |
| "grad_norm": 11.081953598678401, | |
| "learning_rate": 4.3193525326884426e-07, | |
| "logits/chosen": -2.5122551918029785, | |
| "logits/rejected": -2.0895779132843018, | |
| "logps/chosen": -238.4690704345703, | |
| "logps/rejected": -2627.096435546875, | |
| "logps_avg/chosen": -0.6726236343383789, | |
| "logps_avg/rejected": -9.180832862854004, | |
| "loss": 0.587, | |
| "losses_ref": -0.05756605789065361, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 690, | |
| "u": -4.38980770111084, | |
| "weight": 0.0510624423623085 | |
| }, | |
| { | |
| "diff_generated": -32.015716552734375, | |
| "epoch": 1.4551164616592516, | |
| "grad_norm": 11.608639050571856, | |
| "learning_rate": 4.1696439123912406e-07, | |
| "logits/chosen": -2.4778366088867188, | |
| "logits/rejected": -2.0454444885253906, | |
| "logps/chosen": -205.8911590576172, | |
| "logps/rejected": -2957.13525390625, | |
| "logps_avg/chosen": -0.6116452217102051, | |
| "logps_avg/rejected": -9.604714393615723, | |
| "loss": 0.5502, | |
| "losses_ref": -0.05736450105905533, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 695, | |
| "u": -4.357982635498047, | |
| "weight": 0.05952075123786926 | |
| }, | |
| { | |
| "diff_generated": -35.234153747558594, | |
| "epoch": 1.4655849254121958, | |
| "grad_norm": 8.17712308208093, | |
| "learning_rate": 4.0218893981385927e-07, | |
| "logits/chosen": -2.485691547393799, | |
| "logits/rejected": -2.046220064163208, | |
| "logps/chosen": -200.62582397460938, | |
| "logps/rejected": -3101.075439453125, | |
| "logps_avg/chosen": -0.5734541416168213, | |
| "logps_avg/rejected": -10.570245742797852, | |
| "loss": 0.5729, | |
| "losses_ref": -0.028310665860772133, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 700, | |
| "u": -4.45696496963501, | |
| "weight": 0.023738497868180275 | |
| }, | |
| { | |
| "diff_generated": -35.26641082763672, | |
| "epoch": 1.47605338916514, | |
| "grad_norm": 16.950355166034456, | |
| "learning_rate": 3.87613851143229e-07, | |
| "logits/chosen": -2.494295597076416, | |
| "logits/rejected": -2.00370717048645, | |
| "logps/chosen": -230.57400512695312, | |
| "logps/rejected": -3109.327392578125, | |
| "logps_avg/chosen": -0.6209388971328735, | |
| "logps_avg/rejected": -10.57992172241211, | |
| "loss": 0.5466, | |
| "losses_ref": -0.0546514168381691, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 705, | |
| "u": -4.406841278076172, | |
| "weight": 0.04961226135492325 | |
| }, | |
| { | |
| "diff_generated": -34.927207946777344, | |
| "epoch": 1.4865218529180844, | |
| "grad_norm": 9.208840009036596, | |
| "learning_rate": 3.7324401022369744e-07, | |
| "logits/chosen": -2.4626827239990234, | |
| "logits/rejected": -1.9565467834472656, | |
| "logps/chosen": -232.802001953125, | |
| "logps/rejected": -3108.4921875, | |
| "logps_avg/chosen": -0.6169513463973999, | |
| "logps_avg/rejected": -10.47816276550293, | |
| "loss": 0.5383, | |
| "losses_ref": -0.051527369767427444, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 710, | |
| "u": -4.410109043121338, | |
| "weight": 0.04789410158991814 | |
| }, | |
| { | |
| "diff_generated": -31.93350601196289, | |
| "epoch": 1.4969903166710286, | |
| "grad_norm": 8.74366239695945, | |
| "learning_rate": 3.5908423326075455e-07, | |
| "logits/chosen": -2.470921039581299, | |
| "logits/rejected": -2.028719425201416, | |
| "logps/chosen": -197.37814331054688, | |
| "logps/rejected": -2799.31396484375, | |
| "logps_avg/chosen": -0.5950369834899902, | |
| "logps_avg/rejected": -9.580052375793457, | |
| "loss": 0.5627, | |
| "losses_ref": -0.05724947527050972, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 715, | |
| "u": -4.411328315734863, | |
| "weight": 0.047762464731931686 | |
| }, | |
| { | |
| "diff_generated": -33.14401626586914, | |
| "epoch": 1.5074587804239727, | |
| "grad_norm": 8.842328295664547, | |
| "learning_rate": 3.45139266054715e-07, | |
| "logits/chosen": -2.5109152793884277, | |
| "logits/rejected": -2.010921001434326, | |
| "logps/chosen": -247.7344207763672, | |
| "logps/rejected": -3127.861328125, | |
| "logps_avg/chosen": -0.6326244473457336, | |
| "logps_avg/rejected": -9.943206787109375, | |
| "loss": 0.5529, | |
| "losses_ref": -0.05398111790418625, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 720, | |
| "u": -4.407879829406738, | |
| "weight": 0.051137275993824005 | |
| }, | |
| { | |
| "diff_generated": -34.548439025878906, | |
| "epoch": 1.5179272441769172, | |
| "grad_norm": 9.975694420372704, | |
| "learning_rate": 3.314137824101111e-07, | |
| "logits/chosen": -2.5249905586242676, | |
| "logits/rejected": -2.0087645053863525, | |
| "logps/chosen": -254.705322265625, | |
| "logps/rejected": -3178.156494140625, | |
| "logps_avg/chosen": -0.6393792033195496, | |
| "logps_avg/rejected": -10.364530563354492, | |
| "loss": 0.5512, | |
| "losses_ref": -0.05713530257344246, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 725, | |
| "u": -4.388852119445801, | |
| "weight": 0.061693333089351654 | |
| }, | |
| { | |
| "diff_generated": -32.73970413208008, | |
| "epoch": 1.5283957079298613, | |
| "grad_norm": 11.767533184902167, | |
| "learning_rate": 3.179123825692178e-07, | |
| "logits/chosen": -2.47417950630188, | |
| "logits/rejected": -2.016237497329712, | |
| "logps/chosen": -209.87802124023438, | |
| "logps/rejected": -2884.9580078125, | |
| "logps_avg/chosen": -0.5899583101272583, | |
| "logps_avg/rejected": -9.821910858154297, | |
| "loss": 0.5576, | |
| "losses_ref": -0.05416392162442207, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 730, | |
| "u": -4.406733989715576, | |
| "weight": 0.052076805382966995 | |
| }, | |
| { | |
| "diff_generated": -32.37422561645508, | |
| "epoch": 1.5388641716828055, | |
| "grad_norm": 9.47936945913295, | |
| "learning_rate": 3.0463959167023335e-07, | |
| "logits/chosen": -2.5015838146209717, | |
| "logits/rejected": -2.069798231124878, | |
| "logps/chosen": -217.7288055419922, | |
| "logps/rejected": -2870.407958984375, | |
| "logps_avg/chosen": -0.6165660619735718, | |
| "logps_avg/rejected": -9.712267875671387, | |
| "loss": 0.5285, | |
| "losses_ref": -0.08272585272789001, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 735, | |
| "u": -4.363903999328613, | |
| "weight": 0.07859805971384048 | |
| }, | |
| { | |
| "diff_generated": -32.28863525390625, | |
| "epoch": 1.54933263543575, | |
| "grad_norm": 9.124308513157976, | |
| "learning_rate": 2.915998582306299e-07, | |
| "logits/chosen": -2.5220367908477783, | |
| "logits/rejected": -2.038191318511963, | |
| "logps/chosen": -229.7245330810547, | |
| "logps/rejected": -2982.073486328125, | |
| "logps_avg/chosen": -0.617731511592865, | |
| "logps_avg/rejected": -9.686590194702148, | |
| "loss": 0.5329, | |
| "losses_ref": -0.05901874229311943, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 740, | |
| "u": -4.457033634185791, | |
| "weight": 0.051646940410137177 | |
| }, | |
| { | |
| "diff_generated": -31.57918357849121, | |
| "epoch": 1.559801099188694, | |
| "grad_norm": 8.788334428443942, | |
| "learning_rate": 2.7879755265618557e-07, | |
| "logits/chosen": -2.385359287261963, | |
| "logits/rejected": -2.0353574752807617, | |
| "logps/chosen": -191.27542114257812, | |
| "logps/rejected": -2743.20849609375, | |
| "logps_avg/chosen": -0.5724462270736694, | |
| "logps_avg/rejected": -9.473755836486816, | |
| "loss": 0.5301, | |
| "losses_ref": -0.06048304960131645, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 745, | |
| "u": -4.434350967407227, | |
| "weight": 0.051485490053892136 | |
| }, | |
| { | |
| "diff_generated": -33.260643005371094, | |
| "epoch": 1.5702695629416383, | |
| "grad_norm": 13.597985798817346, | |
| "learning_rate": 2.6623696577619625e-07, | |
| "logits/chosen": -2.498661518096924, | |
| "logits/rejected": -2.070701837539673, | |
| "logps/chosen": -227.7393035888672, | |
| "logps/rejected": -2963.530517578125, | |
| "logps_avg/chosen": -0.6551213264465332, | |
| "logps_avg/rejected": -9.978193283081055, | |
| "loss": 0.5837, | |
| "losses_ref": -0.03624705597758293, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 750, | |
| "u": -4.466372966766357, | |
| "weight": 0.028057094663381577 | |
| }, | |
| { | |
| "diff_generated": -29.464405059814453, | |
| "epoch": 1.5807380266945825, | |
| "grad_norm": 9.250307778356563, | |
| "learning_rate": 2.5392230740535846e-07, | |
| "logits/chosen": -2.5032472610473633, | |
| "logits/rejected": -2.06776762008667, | |
| "logps/chosen": -251.3708953857422, | |
| "logps/rejected": -2650.0810546875, | |
| "logps_avg/chosen": -0.6423950791358948, | |
| "logps_avg/rejected": -8.839322090148926, | |
| "loss": 0.5765, | |
| "losses_ref": -0.052409954369068146, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 755, | |
| "u": -4.408892631530762, | |
| "weight": 0.05609407275915146 | |
| }, | |
| { | |
| "diff_generated": -29.876062393188477, | |
| "epoch": 1.5912064904475267, | |
| "grad_norm": 12.686799097235559, | |
| "learning_rate": 2.418577049328058e-07, | |
| "logits/chosen": -2.5676896572113037, | |
| "logits/rejected": -2.1377835273742676, | |
| "logps/chosen": -265.7136535644531, | |
| "logps/rejected": -2646.18896484375, | |
| "logps_avg/chosen": -0.665650486946106, | |
| "logps_avg/rejected": -8.962818145751953, | |
| "loss": 0.5887, | |
| "losses_ref": -0.06443095207214355, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 760, | |
| "u": -4.407828330993652, | |
| "weight": 0.06109876185655594 | |
| }, | |
| { | |
| "diff_generated": -33.68701171875, | |
| "epoch": 1.6016749542004711, | |
| "grad_norm": 10.274482248605684, | |
| "learning_rate": 2.300472019387697e-07, | |
| "logits/chosen": -2.469991683959961, | |
| "logits/rejected": -2.029064893722534, | |
| "logps/chosen": -220.9040985107422, | |
| "logps/rejected": -3017.740234375, | |
| "logps_avg/chosen": -0.6078630685806274, | |
| "logps_avg/rejected": -10.10610294342041, | |
| "loss": 0.5524, | |
| "losses_ref": -0.04078926518559456, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 765, | |
| "u": -4.417876243591309, | |
| "weight": 0.03704729676246643 | |
| }, | |
| { | |
| "diff_generated": -31.825037002563477, | |
| "epoch": 1.6121434179534153, | |
| "grad_norm": 11.839464542057028, | |
| "learning_rate": 2.1849475683932994e-07, | |
| "logits/chosen": -2.4939956665039062, | |
| "logits/rejected": -2.1075644493103027, | |
| "logps/chosen": -223.6890869140625, | |
| "logps/rejected": -2828.83447265625, | |
| "logps_avg/chosen": -0.6260048747062683, | |
| "logps_avg/rejected": -9.547511100769043, | |
| "loss": 0.5492, | |
| "losses_ref": -0.05019731447100639, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 770, | |
| "u": -4.3696393966674805, | |
| "weight": 0.05455632880330086 | |
| }, | |
| { | |
| "diff_generated": -30.594751358032227, | |
| "epoch": 1.6226118817063595, | |
| "grad_norm": 9.146985127674856, | |
| "learning_rate": 2.0720424155971038e-07, | |
| "logits/chosen": -2.4665775299072266, | |
| "logits/rejected": -2.0385656356811523, | |
| "logps/chosen": -238.6437530517578, | |
| "logps/rejected": -2788.4453125, | |
| "logps_avg/chosen": -0.6432589292526245, | |
| "logps_avg/rejected": -9.178424835205078, | |
| "loss": 0.5603, | |
| "losses_ref": -0.060744620859622955, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 775, | |
| "u": -4.3677592277526855, | |
| "weight": 0.054513733834028244 | |
| }, | |
| { | |
| "diff_generated": -30.54671859741211, | |
| "epoch": 1.633080345459304, | |
| "grad_norm": 12.431506597181475, | |
| "learning_rate": 1.961794402365611e-07, | |
| "logits/chosen": -2.48872971534729, | |
| "logits/rejected": -2.045698404312134, | |
| "logps/chosen": -238.8667755126953, | |
| "logps/rejected": -2746.897705078125, | |
| "logps_avg/chosen": -0.6708707809448242, | |
| "logps_avg/rejected": -9.16401481628418, | |
| "loss": 0.5942, | |
| "losses_ref": -0.043663203716278076, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 780, | |
| "u": -4.429020881652832, | |
| "weight": 0.0392422154545784 | |
| }, | |
| { | |
| "diff_generated": -30.78244400024414, | |
| "epoch": 1.643548809212248, | |
| "grad_norm": 14.0111361325287, | |
| "learning_rate": 1.8542404794966427e-07, | |
| "logits/chosen": -2.5275959968566895, | |
| "logits/rejected": -2.0743932723999023, | |
| "logps/chosen": -236.8502655029297, | |
| "logps/rejected": -2726.872802734375, | |
| "logps_avg/chosen": -0.6049509644508362, | |
| "logps_avg/rejected": -9.234731674194336, | |
| "loss": 0.5559, | |
| "losses_ref": -0.040397271513938904, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 785, | |
| "u": -4.439243316650391, | |
| "weight": 0.034041326493024826 | |
| }, | |
| { | |
| "diff_generated": -30.46352767944336, | |
| "epoch": 1.6540172729651923, | |
| "grad_norm": 13.778205091571524, | |
| "learning_rate": 1.7494166948349053e-07, | |
| "logits/chosen": -2.4739108085632324, | |
| "logits/rejected": -2.0248847007751465, | |
| "logps/chosen": -188.06265258789062, | |
| "logps/rejected": -2811.63427734375, | |
| "logps_avg/chosen": -0.58104407787323, | |
| "logps_avg/rejected": -9.139059066772461, | |
| "loss": 0.5279, | |
| "losses_ref": -0.0705099031329155, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 790, | |
| "u": -4.394803047180176, | |
| "weight": 0.06850212812423706 | |
| }, | |
| { | |
| "diff_generated": -31.430471420288086, | |
| "epoch": 1.6644857367181367, | |
| "grad_norm": 7.6385064901749775, | |
| "learning_rate": 1.6473581811901528e-07, | |
| "logits/chosen": -2.465888500213623, | |
| "logits/rejected": -2.0527515411376953, | |
| "logps/chosen": -210.7668914794922, | |
| "logps/rejected": -2648.2431640625, | |
| "logps_avg/chosen": -0.6304226517677307, | |
| "logps_avg/rejected": -9.429141998291016, | |
| "loss": 0.5656, | |
| "losses_ref": -0.035576872527599335, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 795, | |
| "u": -4.473604679107666, | |
| "weight": 0.025509512051939964 | |
| }, | |
| { | |
| "diff_generated": -31.38290023803711, | |
| "epoch": 1.674954200471081, | |
| "grad_norm": 10.762504453960963, | |
| "learning_rate": 1.5480991445620538e-07, | |
| "logits/chosen": -2.458466053009033, | |
| "logits/rejected": -2.0299301147460938, | |
| "logps/chosen": -205.1313018798828, | |
| "logps/rejected": -2810.052001953125, | |
| "logps_avg/chosen": -0.5803036093711853, | |
| "logps_avg/rejected": -9.414871215820312, | |
| "loss": 0.5407, | |
| "losses_ref": -0.06857903301715851, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 800, | |
| "u": -4.383907318115234, | |
| "weight": 0.07058969140052795 | |
| }, | |
| { | |
| "diff_generated": -32.339012145996094, | |
| "epoch": 1.685422664224025, | |
| "grad_norm": 12.623391530366172, | |
| "learning_rate": 1.4516728526756873e-07, | |
| "logits/chosen": -2.4743473529815674, | |
| "logits/rejected": -2.0498290061950684, | |
| "logps/chosen": -213.2050018310547, | |
| "logps/rejected": -2888.50927734375, | |
| "logps_avg/chosen": -0.5934925079345703, | |
| "logps_avg/rejected": -9.701704025268555, | |
| "loss": 0.5501, | |
| "losses_ref": -0.061614394187927246, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 805, | |
| "u": -4.301063537597656, | |
| "weight": 0.06795644760131836 | |
| }, | |
| { | |
| "diff_generated": -29.015087127685547, | |
| "epoch": 1.6958911279769695, | |
| "grad_norm": 17.58977680719491, | |
| "learning_rate": 1.3581116238315194e-07, | |
| "logits/chosen": -2.4904446601867676, | |
| "logits/rejected": -2.050494909286499, | |
| "logps/chosen": -245.46176147460938, | |
| "logps/rejected": -2670.2060546875, | |
| "logps_avg/chosen": -0.6670945882797241, | |
| "logps_avg/rejected": -8.704526901245117, | |
| "loss": 0.5769, | |
| "losses_ref": -0.05934567004442215, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 810, | |
| "u": -4.374115943908691, | |
| "weight": 0.05290456861257553 | |
| }, | |
| { | |
| "diff_generated": -31.670734405517578, | |
| "epoch": 1.7063595917299135, | |
| "grad_norm": 20.41492239134003, | |
| "learning_rate": 1.2674468160735586e-07, | |
| "logits/chosen": -2.5279009342193604, | |
| "logits/rejected": -2.089564800262451, | |
| "logps/chosen": -219.30712890625, | |
| "logps/rejected": -2705.98193359375, | |
| "logps_avg/chosen": -0.6055987477302551, | |
| "logps_avg/rejected": -9.501219749450684, | |
| "loss": 0.5913, | |
| "losses_ref": -0.04426007717847824, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 815, | |
| "u": -4.457495212554932, | |
| "weight": 0.04348568618297577 | |
| }, | |
| { | |
| "diff_generated": -32.43749237060547, | |
| "epoch": 1.7168280554828579, | |
| "grad_norm": 8.725588658168348, | |
| "learning_rate": 1.1797088166794e-07, | |
| "logits/chosen": -2.479827880859375, | |
| "logits/rejected": -2.0322813987731934, | |
| "logps/chosen": -209.2858428955078, | |
| "logps/rejected": -2927.29150390625, | |
| "logps_avg/chosen": -0.5941019058227539, | |
| "logps_avg/rejected": -9.731245994567871, | |
| "loss": 0.5891, | |
| "losses_ref": -0.03500083088874817, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 820, | |
| "u": -4.411776065826416, | |
| "weight": 0.028576117008924484 | |
| }, | |
| { | |
| "diff_generated": -29.760284423828125, | |
| "epoch": 1.7272965192358023, | |
| "grad_norm": 7.224696592212977, | |
| "learning_rate": 1.0949270319755766e-07, | |
| "logits/chosen": -2.5083603858947754, | |
| "logits/rejected": -2.0863795280456543, | |
| "logps/chosen": -206.98812866210938, | |
| "logps/rejected": -2673.796875, | |
| "logps_avg/chosen": -0.5425812005996704, | |
| "logps_avg/rejected": -8.928085327148438, | |
| "loss": 0.5471, | |
| "losses_ref": -0.040049560368061066, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 825, | |
| "u": -4.422328472137451, | |
| "weight": 0.03419359400868416 | |
| }, | |
| { | |
| "diff_generated": -30.241125106811523, | |
| "epoch": 1.7377649829887463, | |
| "grad_norm": 11.359999539925766, | |
| "learning_rate": 1.013129877481741e-07, | |
| "logits/chosen": -2.4465301036834717, | |
| "logits/rejected": -2.0786962509155273, | |
| "logps/chosen": -251.66110229492188, | |
| "logps/rejected": -2615.54248046875, | |
| "logps_avg/chosen": -0.6354495286941528, | |
| "logps_avg/rejected": -9.07233715057373, | |
| "loss": 0.5595, | |
| "losses_ref": -0.038409143686294556, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 830, | |
| "u": -4.411218643188477, | |
| "weight": 0.03372519463300705 | |
| }, | |
| { | |
| "diff_generated": -31.309673309326172, | |
| "epoch": 1.7482334467416907, | |
| "grad_norm": 10.689212774701963, | |
| "learning_rate": 9.343447683868799e-08, | |
| "logits/chosen": -2.459969997406006, | |
| "logits/rejected": -2.0669496059417725, | |
| "logps/chosen": -197.42056274414062, | |
| "logps/rejected": -2780.952392578125, | |
| "logps_avg/chosen": -0.5673859715461731, | |
| "logps_avg/rejected": -9.392901420593262, | |
| "loss": 0.5517, | |
| "losses_ref": -0.03770770505070686, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 835, | |
| "u": -4.412764549255371, | |
| "weight": 0.03573904559016228 | |
| }, | |
| { | |
| "diff_generated": -30.0009765625, | |
| "epoch": 1.7587019104946349, | |
| "grad_norm": 13.800508017129163, | |
| "learning_rate": 8.585981103608342e-08, | |
| "logits/chosen": -2.48380184173584, | |
| "logits/rejected": -2.0376243591308594, | |
| "logps/chosen": -247.1182861328125, | |
| "logps/rejected": -2758.78857421875, | |
| "logps_avg/chosen": -0.6514982581138611, | |
| "logps_avg/rejected": -9.000292778015137, | |
| "loss": 0.5682, | |
| "losses_ref": -0.04732600972056389, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 840, | |
| "u": -4.44762659072876, | |
| "weight": 0.04196245223283768 | |
| }, | |
| { | |
| "diff_generated": -30.065624237060547, | |
| "epoch": 1.769170374247579, | |
| "grad_norm": 13.143185887862547, | |
| "learning_rate": 7.859152907041544e-08, | |
| "logits/chosen": -2.4641730785369873, | |
| "logits/rejected": -2.0567100048065186, | |
| "logps/chosen": -236.99148559570312, | |
| "logps/rejected": -2573.870849609375, | |
| "logps_avg/chosen": -0.6164765357971191, | |
| "logps_avg/rejected": -9.019688606262207, | |
| "loss": 0.5526, | |
| "losses_ref": -0.05898575857281685, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 845, | |
| "u": -4.398618698120117, | |
| "weight": 0.060839347541332245 | |
| }, | |
| { | |
| "diff_generated": -30.66835594177246, | |
| "epoch": 1.7796388380005235, | |
| "grad_norm": 17.88344708080126, | |
| "learning_rate": 7.163206698392742e-08, | |
| "logits/chosen": -2.4754815101623535, | |
| "logits/rejected": -2.077538251876831, | |
| "logps/chosen": -222.5938262939453, | |
| "logps/rejected": -2694.906494140625, | |
| "logps_avg/chosen": -0.6013268232345581, | |
| "logps_avg/rejected": -9.200507164001465, | |
| "loss": 0.5739, | |
| "losses_ref": -0.05739979073405266, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 850, | |
| "u": -4.392508506774902, | |
| "weight": 0.04965168982744217 | |
| }, | |
| { | |
| "diff_generated": -29.592029571533203, | |
| "epoch": 1.7901073017534677, | |
| "grad_norm": 13.06278922990348, | |
| "learning_rate": 6.498375731458527e-08, | |
| "logits/chosen": -2.514953136444092, | |
| "logits/rejected": -2.096156597137451, | |
| "logps/chosen": -233.39132690429688, | |
| "logps/rejected": -2654.203857421875, | |
| "logps_avg/chosen": -0.6016189455986023, | |
| "logps_avg/rejected": -8.877609252929688, | |
| "loss": 0.5566, | |
| "losses_ref": -0.04416666924953461, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 855, | |
| "u": -4.415837287902832, | |
| "weight": 0.03559427708387375 | |
| }, | |
| { | |
| "diff_generated": -32.24101638793945, | |
| "epoch": 1.8005757655064119, | |
| "grad_norm": 12.360220474861023, | |
| "learning_rate": 5.8648828314302735e-08, | |
| "logits/chosen": -2.4461560249328613, | |
| "logits/rejected": -2.015535354614258, | |
| "logps/chosen": -225.93533325195312, | |
| "logps/rejected": -2782.87255859375, | |
| "logps_avg/chosen": -0.5964374542236328, | |
| "logps_avg/rejected": -9.6723051071167, | |
| "loss": 0.5666, | |
| "losses_ref": -0.03670288249850273, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 860, | |
| "u": -4.398609161376953, | |
| "weight": 0.03543057292699814 | |
| }, | |
| { | |
| "diff_generated": -31.262531280517578, | |
| "epoch": 1.8110442292593563, | |
| "grad_norm": 16.164691771356388, | |
| "learning_rate": 5.2629403202119505e-08, | |
| "logits/chosen": -2.4537065029144287, | |
| "logits/rejected": -2.062150716781616, | |
| "logps/chosen": -204.52587890625, | |
| "logps/rejected": -2741.170654296875, | |
| "logps_avg/chosen": -0.5822928547859192, | |
| "logps_avg/rejected": -9.378759384155273, | |
| "loss": 0.5402, | |
| "losses_ref": -0.03764919191598892, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 865, | |
| "u": -4.42946720123291, | |
| "weight": 0.031336475163698196 | |
| }, | |
| { | |
| "diff_generated": -30.04671859741211, | |
| "epoch": 1.8215126930123005, | |
| "grad_norm": 10.013135246955365, | |
| "learning_rate": 4.692749945258057e-08, | |
| "logits/chosen": -2.4766173362731934, | |
| "logits/rejected": -2.0611166954040527, | |
| "logps/chosen": -236.82284545898438, | |
| "logps/rejected": -2744.845458984375, | |
| "logps_avg/chosen": -0.6182196736335754, | |
| "logps_avg/rejected": -9.014015197753906, | |
| "loss": 0.5905, | |
| "losses_ref": -0.07179991900920868, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 870, | |
| "u": -4.407280921936035, | |
| "weight": 0.07166210561990738 | |
| }, | |
| { | |
| "diff_generated": -32.90989303588867, | |
| "epoch": 1.8319811567652446, | |
| "grad_norm": 9.513246816083905, | |
| "learning_rate": 4.1545028119559066e-08, | |
| "logits/chosen": -2.4886152744293213, | |
| "logits/rejected": -2.066333770751953, | |
| "logps/chosen": -223.5939483642578, | |
| "logps/rejected": -2896.932373046875, | |
| "logps_avg/chosen": -0.6256131529808044, | |
| "logps_avg/rejected": -9.872968673706055, | |
| "loss": 0.5458, | |
| "losses_ref": -0.0590200200676918, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 875, | |
| "u": -4.408657073974609, | |
| "weight": 0.056226253509521484 | |
| }, | |
| { | |
| "diff_generated": -31.053930282592773, | |
| "epoch": 1.842449620518189, | |
| "grad_norm": 94.13052968470578, | |
| "learning_rate": 3.648379319574568e-08, | |
| "logits/chosen": -2.528390407562256, | |
| "logits/rejected": -2.073420524597168, | |
| "logps/chosen": -222.608642578125, | |
| "logps/rejected": -2745.4130859375, | |
| "logps_avg/chosen": -0.6137613654136658, | |
| "logps_avg/rejected": -9.316179275512695, | |
| "loss": 0.5237, | |
| "losses_ref": -0.06711964309215546, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 880, | |
| "u": -4.3957600593566895, | |
| "weight": 0.06412933766841888 | |
| }, | |
| { | |
| "diff_generated": -30.842365264892578, | |
| "epoch": 1.8529180842711332, | |
| "grad_norm": 8.312877021027528, | |
| "learning_rate": 3.17454910080216e-08, | |
| "logits/chosen": -2.5333809852600098, | |
| "logits/rejected": -2.1170499324798584, | |
| "logps/chosen": -253.5600128173828, | |
| "logps/rejected": -2778.802001953125, | |
| "logps_avg/chosen": -0.6801126599311829, | |
| "logps_avg/rejected": -9.25270938873291, | |
| "loss": 0.5709, | |
| "losses_ref": -0.0633564293384552, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 885, | |
| "u": -4.409802436828613, | |
| "weight": 0.05907650664448738 | |
| }, | |
| { | |
| "diff_generated": -30.68337631225586, | |
| "epoch": 1.8633865480240774, | |
| "grad_norm": 8.793473948703046, | |
| "learning_rate": 2.733170964891607e-08, | |
| "logits/chosen": -2.46742582321167, | |
| "logits/rejected": -2.0830397605895996, | |
| "logps/chosen": -204.62625122070312, | |
| "logps/rejected": -2726.16552734375, | |
| "logps_avg/chosen": -0.5727981328964233, | |
| "logps_avg/rejected": -9.205012321472168, | |
| "loss": 0.5596, | |
| "losses_ref": -0.05169714242219925, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 890, | |
| "u": -4.437578201293945, | |
| "weight": 0.043870192021131516 | |
| }, | |
| { | |
| "diff_generated": -30.3818302154541, | |
| "epoch": 1.8738550117770219, | |
| "grad_norm": 6.648166332075938, | |
| "learning_rate": 2.324392844434042e-08, | |
| "logits/chosen": -2.491211414337158, | |
| "logits/rejected": -2.0470757484436035, | |
| "logps/chosen": -229.8271026611328, | |
| "logps/rejected": -2785.03076171875, | |
| "logps_avg/chosen": -0.6076307892799377, | |
| "logps_avg/rejected": -9.11454963684082, | |
| "loss": 0.5638, | |
| "losses_ref": -0.032108329236507416, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 895, | |
| "u": -4.453085899353027, | |
| "weight": 0.02507254108786583 | |
| }, | |
| { | |
| "diff_generated": -31.61887550354004, | |
| "epoch": 1.8843234755299658, | |
| "grad_norm": 18.51567409544646, | |
| "learning_rate": 1.9483517457776434e-08, | |
| "logits/chosen": -2.4359021186828613, | |
| "logits/rejected": -2.096619129180908, | |
| "logps/chosen": -188.21896362304688, | |
| "logps/rejected": -2806.19921875, | |
| "logps_avg/chosen": -0.5758072733879089, | |
| "logps_avg/rejected": -9.485663414001465, | |
| "loss": 0.5343, | |
| "losses_ref": -0.08278501033782959, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 900, | |
| "u": -4.343811988830566, | |
| "weight": 0.08570453524589539 | |
| }, | |
| { | |
| "diff_generated": -31.200836181640625, | |
| "epoch": 1.8947919392829102, | |
| "grad_norm": 16.452754098885247, | |
| "learning_rate": 1.6051737031084533e-08, | |
| "logits/chosen": -2.453563690185547, | |
| "logits/rejected": -2.0280988216400146, | |
| "logps/chosen": -214.77395629882812, | |
| "logps/rejected": -2817.1669921875, | |
| "logps_avg/chosen": -0.5827924013137817, | |
| "logps_avg/rejected": -9.360250473022461, | |
| "loss": 0.5565, | |
| "losses_ref": -0.0487370602786541, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 905, | |
| "u": -4.410248756408691, | |
| "weight": 0.04455076903104782 | |
| }, | |
| { | |
| "diff_generated": -30.942846298217773, | |
| "epoch": 1.9052604030358546, | |
| "grad_norm": 20.55170462638644, | |
| "learning_rate": 1.2949737362087154e-08, | |
| "logits/chosen": -2.467200756072998, | |
| "logits/rejected": -2.096820831298828, | |
| "logps/chosen": -206.9503936767578, | |
| "logps/rejected": -2817.215087890625, | |
| "logps_avg/chosen": -0.6169668436050415, | |
| "logps_avg/rejected": -9.282854080200195, | |
| "loss": 0.5886, | |
| "losses_ref": -0.0511205717921257, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 910, | |
| "u": -4.404674053192139, | |
| "weight": 0.04643367975950241 | |
| }, | |
| { | |
| "diff_generated": -30.169301986694336, | |
| "epoch": 1.9157288667887986, | |
| "grad_norm": 8.053020444587133, | |
| "learning_rate": 1.0178558119067315e-08, | |
| "logits/chosen": -2.4181623458862305, | |
| "logits/rejected": -2.028630018234253, | |
| "logps/chosen": -212.6619873046875, | |
| "logps/rejected": -2651.956787109375, | |
| "logps_avg/chosen": -0.5928919315338135, | |
| "logps_avg/rejected": -9.050790786743164, | |
| "loss": 0.5551, | |
| "losses_ref": -0.05854606628417969, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 915, | |
| "u": -4.413174629211426, | |
| "weight": 0.0472232848405838 | |
| }, | |
| { | |
| "diff_generated": -32.18278121948242, | |
| "epoch": 1.926197330541743, | |
| "grad_norm": 24.431507328322112, | |
| "learning_rate": 7.739128092312918e-09, | |
| "logits/chosen": -2.4973015785217285, | |
| "logits/rejected": -2.0860588550567627, | |
| "logps/chosen": -216.73666381835938, | |
| "logps/rejected": -2769.303955078125, | |
| "logps_avg/chosen": -0.6046438813209534, | |
| "logps_avg/rejected": -9.654834747314453, | |
| "loss": 0.5467, | |
| "losses_ref": -0.06063861399888992, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 920, | |
| "u": -4.384333610534668, | |
| "weight": 0.07002799212932587 | |
| }, | |
| { | |
| "diff_generated": -33.56671142578125, | |
| "epoch": 1.9366657942946872, | |
| "grad_norm": 12.304529486565588, | |
| "learning_rate": 5.632264882822757e-09, | |
| "logits/chosen": -2.499455451965332, | |
| "logits/rejected": -2.059584140777588, | |
| "logps/chosen": -228.59640502929688, | |
| "logps/rejected": -2900.51123046875, | |
| "logps_avg/chosen": -0.6097213625907898, | |
| "logps_avg/rejected": -10.070013046264648, | |
| "loss": 0.5799, | |
| "losses_ref": -0.0342455692589283, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 925, | |
| "u": -4.438467979431152, | |
| "weight": 0.030456313863396645 | |
| }, | |
| { | |
| "diff_generated": -30.743816375732422, | |
| "epoch": 1.9471342580476314, | |
| "grad_norm": 11.65318893393544, | |
| "learning_rate": 3.858674628278824e-09, | |
| "logits/chosen": -2.4831936359405518, | |
| "logits/rejected": -2.0906691551208496, | |
| "logps/chosen": -230.875, | |
| "logps/rejected": -2670.49755859375, | |
| "logps_avg/chosen": -0.603253960609436, | |
| "logps_avg/rejected": -9.223145484924316, | |
| "loss": 0.5642, | |
| "losses_ref": -0.05138419196009636, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 930, | |
| "u": -4.435812473297119, | |
| "weight": 0.0458533950150013 | |
| }, | |
| { | |
| "diff_generated": -31.753076553344727, | |
| "epoch": 1.9576027218005758, | |
| "grad_norm": 16.58166205034555, | |
| "learning_rate": 2.418951766376742e-09, | |
| "logits/chosen": -2.4695091247558594, | |
| "logits/rejected": -2.0497422218322754, | |
| "logps/chosen": -205.1109619140625, | |
| "logps/rejected": -2825.771484375, | |
| "logps_avg/chosen": -0.5685989260673523, | |
| "logps_avg/rejected": -9.525922775268555, | |
| "loss": 0.554, | |
| "losses_ref": -0.05179325491189957, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 935, | |
| "u": -4.395134925842285, | |
| "weight": 0.04630660265684128 | |
| }, | |
| { | |
| "diff_generated": -31.87947654724121, | |
| "epoch": 1.96807118555352, | |
| "grad_norm": 26.35143781539668, | |
| "learning_rate": 1.313578835593465e-09, | |
| "logits/chosen": -2.4483304023742676, | |
| "logits/rejected": -2.004983425140381, | |
| "logps/chosen": -241.7947998046875, | |
| "logps/rejected": -2828.03173828125, | |
| "logps_avg/chosen": -0.6296852827072144, | |
| "logps_avg/rejected": -9.5638427734375, | |
| "loss": 0.5603, | |
| "losses_ref": -0.03613152354955673, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 940, | |
| "u": -4.452606678009033, | |
| "weight": 0.026990771293640137 | |
| }, | |
| { | |
| "diff_generated": -29.87912940979004, | |
| "epoch": 1.9785396493064642, | |
| "grad_norm": 15.814334066391242, | |
| "learning_rate": 5.429263134594242e-10, | |
| "logits/chosen": -2.4958741664886475, | |
| "logits/rejected": -2.101313591003418, | |
| "logps/chosen": -207.99179077148438, | |
| "logps/rejected": -2708.303466796875, | |
| "logps_avg/chosen": -0.5728383660316467, | |
| "logps_avg/rejected": -8.963739395141602, | |
| "loss": 0.5538, | |
| "losses_ref": -0.06295718252658844, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 945, | |
| "u": -4.400644302368164, | |
| "weight": 0.05401432514190674 | |
| }, | |
| { | |
| "diff_generated": -31.941226959228516, | |
| "epoch": 1.9890081130594086, | |
| "grad_norm": 8.580957108117007, | |
| "learning_rate": 1.0725249238940915e-10, | |
| "logits/chosen": -2.4698963165283203, | |
| "logits/rejected": -2.0529587268829346, | |
| "logps/chosen": -231.325927734375, | |
| "logps/rejected": -2804.859619140625, | |
| "logps_avg/chosen": -0.6270388960838318, | |
| "logps_avg/rejected": -9.582367897033691, | |
| "loss": 0.5563, | |
| "losses_ref": -0.029423978179693222, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 950, | |
| "u": -4.438694477081299, | |
| "weight": 0.025990551337599754 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 954, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |