| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 3354, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005963918294319368, |
| "grad_norm": 19.664148330688477, |
| "learning_rate": 4.5e-06, |
| "logits/chosen": -61.68421173095703, |
| "logits/rejected": -59.81378936767578, |
| "logps/chosen": -3847.377685546875, |
| "logps/rejected": -3732.838623046875, |
| "loss": 0.6957, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": 0.009157716296613216, |
| "rewards/margins": 0.04523131996393204, |
| "rewards/rejected": -0.036073606461286545, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.011927836588638736, |
| "grad_norm": 27.623851776123047, |
| "learning_rate": 9.5e-06, |
| "logits/chosen": -62.0186767578125, |
| "logits/rejected": -62.229949951171875, |
| "logps/chosen": -3975.751953125, |
| "logps/rejected": -3489.30615234375, |
| "loss": 0.6545, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.3238146901130676, |
| "rewards/margins": 0.1601802110671997, |
| "rewards/rejected": 0.1636344939470291, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.017891754882958102, |
| "grad_norm": 27.4714412689209, |
| "learning_rate": 1.45e-05, |
| "logits/chosen": -61.2970085144043, |
| "logits/rejected": -60.396812438964844, |
| "logps/chosen": -3610.776123046875, |
| "logps/rejected": -3742.642578125, |
| "loss": 0.582, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.0766934156417847, |
| "rewards/margins": 0.32587307691574097, |
| "rewards/rejected": 0.7508202791213989, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02385567317727747, |
| "grad_norm": 11.324883460998535, |
| "learning_rate": 1.9500000000000003e-05, |
| "logits/chosen": -57.98284149169922, |
| "logits/rejected": -58.55870819091797, |
| "logps/chosen": -3499.75732421875, |
| "logps/rejected": -3155.265625, |
| "loss": 0.465, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.3674192428588867, |
| "rewards/margins": 0.7249017357826233, |
| "rewards/rejected": 1.6425174474716187, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.029819591471596837, |
| "grad_norm": 20.27655601501465, |
| "learning_rate": 2.45e-05, |
| "logits/chosen": -58.64543914794922, |
| "logits/rejected": -56.548179626464844, |
| "logps/chosen": -3525.67431640625, |
| "logps/rejected": -3034.49267578125, |
| "loss": 0.5633, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 6.382194519042969, |
| "rewards/margins": 2.046049118041992, |
| "rewards/rejected": 4.336145877838135, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.035783509765916204, |
| "grad_norm": 65.18791961669922, |
| "learning_rate": 2.95e-05, |
| "logits/chosen": -57.381561279296875, |
| "logits/rejected": -56.21010208129883, |
| "logps/chosen": -3934.571533203125, |
| "logps/rejected": -3834.8515625, |
| "loss": 0.2975, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 10.051956176757812, |
| "rewards/margins": 2.9109206199645996, |
| "rewards/rejected": 7.141035556793213, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04174742806023558, |
| "grad_norm": 3.803753614425659, |
| "learning_rate": 3.45e-05, |
| "logits/chosen": -58.149757385253906, |
| "logits/rejected": -57.435768127441406, |
| "logps/chosen": -3799.5859375, |
| "logps/rejected": -3451.617919921875, |
| "loss": 0.6205, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 9.363242149353027, |
| "rewards/margins": 3.4164345264434814, |
| "rewards/rejected": 5.946808815002441, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04771134635455494, |
| "grad_norm": 27.553604125976562, |
| "learning_rate": 3.9500000000000005e-05, |
| "logits/chosen": -59.83305740356445, |
| "logits/rejected": -59.072486877441406, |
| "logps/chosen": -3933.164794921875, |
| "logps/rejected": -3912.46240234375, |
| "loss": 0.5561, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 8.799053192138672, |
| "rewards/margins": 4.589492321014404, |
| "rewards/rejected": 4.209560871124268, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05367526464887431, |
| "grad_norm": 4.916882514953613, |
| "learning_rate": 4.4500000000000004e-05, |
| "logits/chosen": -58.64013671875, |
| "logits/rejected": -58.137168884277344, |
| "logps/chosen": -3592.069580078125, |
| "logps/rejected": -3692.88427734375, |
| "loss": 0.2698, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 6.715191841125488, |
| "rewards/margins": 4.236520290374756, |
| "rewards/rejected": 2.4786713123321533, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.059639182943193675, |
| "grad_norm": 39.796302795410156, |
| "learning_rate": 4.9500000000000004e-05, |
| "logits/chosen": -58.97795867919922, |
| "logits/rejected": -60.2847785949707, |
| "logps/chosen": -3994.854248046875, |
| "logps/rejected": -4313.3271484375, |
| "loss": 0.2365, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": 8.663000106811523, |
| "rewards/margins": 4.546173095703125, |
| "rewards/rejected": 4.11682653427124, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06560310123751305, |
| "grad_norm": 3.1577208042144775, |
| "learning_rate": 4.9999056250036984e-05, |
| "logits/chosen": -58.68115234375, |
| "logits/rejected": -58.9907341003418, |
| "logps/chosen": -3749.045654296875, |
| "logps/rejected": -3762.167236328125, |
| "loss": 0.0366, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.467626571655273, |
| "rewards/margins": 5.942251682281494, |
| "rewards/rejected": 5.525373935699463, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.07156701953183241, |
| "grad_norm": 3.2147419452667236, |
| "learning_rate": 4.999579399596396e-05, |
| "logits/chosen": -56.03290939331055, |
| "logits/rejected": -56.16686248779297, |
| "logps/chosen": -3634.969482421875, |
| "logps/rejected": -3742.016845703125, |
| "loss": 0.2662, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 18.00886344909668, |
| "rewards/margins": 8.250906944274902, |
| "rewards/rejected": 9.757956504821777, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07753093782615178, |
| "grad_norm": 0.006837156601250172, |
| "learning_rate": 4.9990201890548246e-05, |
| "logits/chosen": -53.0700569152832, |
| "logits/rejected": -53.876380920410156, |
| "logps/chosen": -4084.875732421875, |
| "logps/rejected": -4197.57958984375, |
| "loss": 0.0278, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 18.550527572631836, |
| "rewards/margins": 10.616544723510742, |
| "rewards/rejected": 7.933982849121094, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.08349485612047115, |
| "grad_norm": 0.585788905620575, |
| "learning_rate": 4.998228045502851e-05, |
| "logits/chosen": -55.47416305541992, |
| "logits/rejected": -55.35027313232422, |
| "logps/chosen": -3845.14453125, |
| "logps/rejected": -3608.837158203125, |
| "loss": 0.0453, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 15.1895112991333, |
| "rewards/margins": 8.870885848999023, |
| "rewards/rejected": 6.318624973297119, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08945877441479051, |
| "grad_norm": 0.07510236650705338, |
| "learning_rate": 4.9972030427759666e-05, |
| "logits/chosen": -50.84516906738281, |
| "logits/rejected": -50.26861572265625, |
| "logps/chosen": -3661.830078125, |
| "logps/rejected": -3803.436767578125, |
| "loss": 0.2181, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 15.063519477844238, |
| "rewards/margins": 11.220986366271973, |
| "rewards/rejected": 3.842533826828003, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.09542269270910989, |
| "grad_norm": 0.06167513504624367, |
| "learning_rate": 4.995945276414404e-05, |
| "logits/chosen": -52.945655822753906, |
| "logits/rejected": -53.59800338745117, |
| "logps/chosen": -3374.52197265625, |
| "logps/rejected": -3389.505126953125, |
| "loss": 0.0903, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 8.205995559692383, |
| "rewards/margins": 9.442131996154785, |
| "rewards/rejected": -1.2361366748809814, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.10138661100342926, |
| "grad_norm": 4.309830188751221, |
| "learning_rate": 4.994454863654233e-05, |
| "logits/chosen": -58.374549865722656, |
| "logits/rejected": -57.894004821777344, |
| "logps/chosen": -4359.32177734375, |
| "logps/rejected": -4773.193359375, |
| "loss": 0.0278, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.901937961578369, |
| "rewards/margins": 13.444661140441895, |
| "rewards/rejected": -6.542723178863525, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.10735052929774862, |
| "grad_norm": 0.1345188468694687, |
| "learning_rate": 4.992731943416432e-05, |
| "logits/chosen": -56.97514724731445, |
| "logits/rejected": -57.7388801574707, |
| "logps/chosen": -3933.59716796875, |
| "logps/rejected": -4406.06103515625, |
| "loss": 0.093, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 7.6058669090271, |
| "rewards/margins": 12.426251411437988, |
| "rewards/rejected": -4.8203840255737305, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.11331444759206799, |
| "grad_norm": 0.4823387563228607, |
| "learning_rate": 4.990776676293941e-05, |
| "logits/chosen": -60.002403259277344, |
| "logits/rejected": -61.1885871887207, |
| "logps/chosen": -4209.791015625, |
| "logps/rejected": -4174.68212890625, |
| "loss": 0.1487, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 3.5269577503204346, |
| "rewards/margins": 8.545251846313477, |
| "rewards/rejected": -5.018294811248779, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.11927836588638735, |
| "grad_norm": 1.821547031402588, |
| "learning_rate": 4.98858924453669e-05, |
| "logits/chosen": -60.311546325683594, |
| "logits/rejected": -61.352203369140625, |
| "logps/chosen": -3726.235595703125, |
| "logps/rejected": -3612.451904296875, |
| "loss": 0.0083, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0089493989944458, |
| "rewards/margins": 10.570683479309082, |
| "rewards/rejected": -9.561734199523926, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12524228418070674, |
| "grad_norm": 0.010515117086470127, |
| "learning_rate": 4.9861698520346155e-05, |
| "logits/chosen": -58.907005310058594, |
| "logits/rejected": -58.76160430908203, |
| "logps/chosen": -3922.297607421875, |
| "logps/rejected": -3831.158935546875, |
| "loss": 0.0106, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.980830669403076, |
| "rewards/margins": 11.232372283935547, |
| "rewards/rejected": -5.251542091369629, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.1312062024750261, |
| "grad_norm": 0.13617613911628723, |
| "learning_rate": 4.983518724298652e-05, |
| "logits/chosen": -57.034385681152344, |
| "logits/rejected": -61.353546142578125, |
| "logps/chosen": -3717.315185546875, |
| "logps/rejected": -4125.3466796875, |
| "loss": 0.0403, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 11.408642768859863, |
| "rewards/margins": 12.029062271118164, |
| "rewards/rejected": -0.6204200983047485, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.13717012076934545, |
| "grad_norm": 0.11460208892822266, |
| "learning_rate": 4.980636108439712e-05, |
| "logits/chosen": -60.95244216918945, |
| "logits/rejected": -62.050682067871094, |
| "logps/chosen": -3819.20654296875, |
| "logps/rejected": -3770.43701171875, |
| "loss": 0.2529, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -2.9785029888153076, |
| "rewards/margins": 11.722539901733398, |
| "rewards/rejected": -14.701044082641602, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.14313403906366481, |
| "grad_norm": 0.010561717674136162, |
| "learning_rate": 4.97752227314566e-05, |
| "logits/chosen": -61.96656036376953, |
| "logits/rejected": -63.59345245361328, |
| "logps/chosen": -3559.760986328125, |
| "logps/rejected": -3238.27099609375, |
| "loss": 0.0605, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -3.5705113410949707, |
| "rewards/margins": 10.212821960449219, |
| "rewards/rejected": -13.783332824707031, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.1490979573579842, |
| "grad_norm": 0.020034722983837128, |
| "learning_rate": 4.9741775086562576e-05, |
| "logits/chosen": -58.21870040893555, |
| "logits/rejected": -59.43880081176758, |
| "logps/chosen": -3953.829345703125, |
| "logps/rejected": -4362.8837890625, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.396712779998779, |
| "rewards/margins": 12.898712158203125, |
| "rewards/rejected": -7.5019989013671875, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.15506187565230356, |
| "grad_norm": 0.0008314030710607767, |
| "learning_rate": 4.970602126736118e-05, |
| "logits/chosen": -58.50162887573242, |
| "logits/rejected": -59.48447799682617, |
| "logps/chosen": -3606.218017578125, |
| "logps/rejected": -3528.481689453125, |
| "loss": 0.0016, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.148120880126953, |
| "rewards/margins": 12.437234878540039, |
| "rewards/rejected": -3.2891151905059814, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.16102579394662292, |
| "grad_norm": 1.2535442113876343, |
| "learning_rate": 4.966796460645644e-05, |
| "logits/chosen": -59.570526123046875, |
| "logits/rejected": -63.26567459106445, |
| "logps/chosen": -3971.397705078125, |
| "logps/rejected": -4289.9853515625, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.893758296966553, |
| "rewards/margins": 15.240409851074219, |
| "rewards/rejected": -10.346652030944824, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.1669897122409423, |
| "grad_norm": 0.0032445124816149473, |
| "learning_rate": 4.962760865109964e-05, |
| "logits/chosen": -56.13077926635742, |
| "logits/rejected": -57.030982971191406, |
| "logps/chosen": -3904.32177734375, |
| "logps/rejected": -3896.450439453125, |
| "loss": 0.0019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.48493766784668, |
| "rewards/margins": 15.189654350280762, |
| "rewards/rejected": -4.704716205596924, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.17295363053526167, |
| "grad_norm": 0.0017609696369618177, |
| "learning_rate": 4.95849571628587e-05, |
| "logits/chosen": -57.822837829589844, |
| "logits/rejected": -56.972389221191406, |
| "logps/chosen": -3716.139892578125, |
| "logps/rejected": -3481.774658203125, |
| "loss": 0.0103, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.023122787475586, |
| "rewards/margins": 13.122570991516113, |
| "rewards/rejected": 0.9005520939826965, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.17891754882958102, |
| "grad_norm": 1.900453805923462, |
| "learning_rate": 4.954001411726755e-05, |
| "logits/chosen": -57.431907653808594, |
| "logits/rejected": -60.528594970703125, |
| "logps/chosen": -4033.428955078125, |
| "logps/rejected": -4488.1083984375, |
| "loss": 0.0132, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.56352710723877, |
| "rewards/margins": 13.36528491973877, |
| "rewards/rejected": -3.8017585277557373, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.1848814671239004, |
| "grad_norm": 0.022368023172020912, |
| "learning_rate": 4.949278370345558e-05, |
| "logits/chosen": -57.648948669433594, |
| "logits/rejected": -61.04389190673828, |
| "logps/chosen": -3662.002685546875, |
| "logps/rejected": -3926.045654296875, |
| "loss": 0.2272, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 6.355870246887207, |
| "rewards/margins": 15.903188705444336, |
| "rewards/rejected": -9.547318458557129, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.19084538541821977, |
| "grad_norm": 0.0004944842658005655, |
| "learning_rate": 4.944327032375716e-05, |
| "logits/chosen": -59.660667419433594, |
| "logits/rejected": -62.95856857299805, |
| "logps/chosen": -3589.598388671875, |
| "logps/rejected": -3629.106201171875, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.476762294769287, |
| "rewards/margins": 13.896219253540039, |
| "rewards/rejected": -7.419455528259277, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.19680930371253913, |
| "grad_norm": 0.00029502142569981515, |
| "learning_rate": 4.93914785933013e-05, |
| "logits/chosen": -55.67151641845703, |
| "logits/rejected": -57.00590896606445, |
| "logps/chosen": -3930.76025390625, |
| "logps/rejected": -3948.296142578125, |
| "loss": 0.0028, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.375188827514648, |
| "rewards/margins": 14.264836311340332, |
| "rewards/rejected": -2.889647960662842, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.20277322200685852, |
| "grad_norm": 0.003186926245689392, |
| "learning_rate": 4.9337413339581494e-05, |
| "logits/chosen": -55.91204833984375, |
| "logits/rejected": -55.10901641845703, |
| "logps/chosen": -3886.98583984375, |
| "logps/rejected": -3999.08251953125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.179794311523438, |
| "rewards/margins": 14.276512145996094, |
| "rewards/rejected": -0.09671908617019653, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.20873714030117788, |
| "grad_norm": 0.0017121023265644908, |
| "learning_rate": 4.928107960200573e-05, |
| "logits/chosen": -52.622962951660156, |
| "logits/rejected": -53.773536682128906, |
| "logps/chosen": -3416.02392578125, |
| "logps/rejected": -3253.180419921875, |
| "loss": 0.0008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 18.344406127929688, |
| "rewards/margins": 12.657001495361328, |
| "rewards/rejected": 5.687404632568359, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.21470105859549724, |
| "grad_norm": 0.005526215769350529, |
| "learning_rate": 4.922248263142678e-05, |
| "logits/chosen": -56.997161865234375, |
| "logits/rejected": -56.7293701171875, |
| "logps/chosen": -3787.90234375, |
| "logps/rejected": -3675.44677734375, |
| "loss": 0.0075, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 20.720577239990234, |
| "rewards/margins": 16.067211151123047, |
| "rewards/rejected": 4.653366565704346, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.22066497688981662, |
| "grad_norm": 0.00579728651791811, |
| "learning_rate": 4.916162788965275e-05, |
| "logits/chosen": -55.556427001953125, |
| "logits/rejected": -57.451927185058594, |
| "logps/chosen": -3876.120361328125, |
| "logps/rejected": -4547.07275390625, |
| "loss": 0.058, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 19.52408218383789, |
| "rewards/margins": 14.548626899719238, |
| "rewards/rejected": 4.975453853607178, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.22662889518413598, |
| "grad_norm": 0.002147270832210779, |
| "learning_rate": 4.909852104893803e-05, |
| "logits/chosen": -55.28306198120117, |
| "logits/rejected": -57.85760498046875, |
| "logps/chosen": -3812.12646484375, |
| "logps/rejected": -3860.81787109375, |
| "loss": 0.0032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.829846382141113, |
| "rewards/margins": 15.175271987915039, |
| "rewards/rejected": -0.34542423486709595, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.23259281347845534, |
| "grad_norm": 2.6284790237696143e-06, |
| "learning_rate": 4.903316799145453e-05, |
| "logits/chosen": -56.578338623046875, |
| "logits/rejected": -61.98085403442383, |
| "logps/chosen": -3731.21484375, |
| "logps/rejected": -3811.68017578125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.379071235656738, |
| "rewards/margins": 15.874186515808105, |
| "rewards/rejected": -0.4951143264770508, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.2385567317727747, |
| "grad_norm": 0.0006998952012509108, |
| "learning_rate": 4.896557480874345e-05, |
| "logits/chosen": -55.025390625, |
| "logits/rejected": -56.02961349487305, |
| "logps/chosen": -4079.14990234375, |
| "logps/rejected": -3936.82666015625, |
| "loss": 0.0222, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 16.036401748657227, |
| "rewards/margins": 15.827476501464844, |
| "rewards/rejected": 0.20892485976219177, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.2445206500670941, |
| "grad_norm": 0.0015518699074164033, |
| "learning_rate": 4.889574780114745e-05, |
| "logits/chosen": -57.45948028564453, |
| "logits/rejected": -58.848793029785156, |
| "logps/chosen": -3700.404296875, |
| "logps/rejected": -3442.11572265625, |
| "loss": 0.0643, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 15.649378776550293, |
| "rewards/margins": 14.64953899383545, |
| "rewards/rejected": 0.9998385310173035, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.2504845683614135, |
| "grad_norm": 1.4911309480667114, |
| "learning_rate": 4.8823693477223444e-05, |
| "logits/chosen": -58.846778869628906, |
| "logits/rejected": -59.60619354248047, |
| "logps/chosen": -3987.731201171875, |
| "logps/rejected": -4248.55078125, |
| "loss": 0.0723, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 15.464131355285645, |
| "rewards/margins": 15.123265266418457, |
| "rewards/rejected": 0.34086543321609497, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.25644848665573283, |
| "grad_norm": 0.001055889530107379, |
| "learning_rate": 4.874941855313587e-05, |
| "logits/chosen": -56.497413635253906, |
| "logits/rejected": -57.141944885253906, |
| "logps/chosen": -3793.345703125, |
| "logps/rejected": -3607.841796875, |
| "loss": 0.129, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 7.237372398376465, |
| "rewards/margins": 13.780862808227539, |
| "rewards/rejected": -6.543488502502441, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.2624124049500522, |
| "grad_norm": 8.529239181598314e-08, |
| "learning_rate": 4.8672929952030764e-05, |
| "logits/chosen": -59.98900604248047, |
| "logits/rejected": -62.46282958984375, |
| "logps/chosen": -4084.49755859375, |
| "logps/rejected": -4386.53271484375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.959494113922119, |
| "rewards/margins": 17.883264541625977, |
| "rewards/rejected": -12.923771858215332, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.26837632324437155, |
| "grad_norm": 0.012090430594980717, |
| "learning_rate": 4.8594234803390384e-05, |
| "logits/chosen": -58.39624786376953, |
| "logits/rejected": -60.7776985168457, |
| "logps/chosen": -4043.174560546875, |
| "logps/rejected": -4138.63037109375, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.332669258117676, |
| "rewards/margins": 14.487439155578613, |
| "rewards/rejected": -10.154768943786621, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.2743402415386909, |
| "grad_norm": 0.011768614873290062, |
| "learning_rate": 4.851334044236871e-05, |
| "logits/chosen": -57.09410858154297, |
| "logits/rejected": -58.455528259277344, |
| "logps/chosen": -3805.43994140625, |
| "logps/rejected": -3812.210205078125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.533733367919922, |
| "rewards/margins": 15.815759658813477, |
| "rewards/rejected": -6.282026767730713, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.28030415983301027, |
| "grad_norm": 0.23742778599262238, |
| "learning_rate": 4.84302544091077e-05, |
| "logits/chosen": -57.45296096801758, |
| "logits/rejected": -58.879905700683594, |
| "logps/chosen": -3779.80419921875, |
| "logps/rejected": -3724.149169921875, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.725826263427734, |
| "rewards/margins": 13.637672424316406, |
| "rewards/rejected": -3.91184663772583, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.28626807812732963, |
| "grad_norm": 0.25321099162101746, |
| "learning_rate": 4.8344984448034555e-05, |
| "logits/chosen": -57.193397521972656, |
| "logits/rejected": -58.29435348510742, |
| "logps/chosen": -3555.61181640625, |
| "logps/rejected": -3608.22265625, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.781957626342773, |
| "rewards/margins": 17.517807006835938, |
| "rewards/rejected": -6.7358503341674805, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.29223199642164904, |
| "grad_norm": 0.0003638965426944196, |
| "learning_rate": 4.825753850713977e-05, |
| "logits/chosen": -55.068214416503906, |
| "logits/rejected": -55.34920120239258, |
| "logps/chosen": -3720.23486328125, |
| "logps/rejected": -3710.431640625, |
| "loss": 0.0142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.105955123901367, |
| "rewards/margins": 15.404626846313477, |
| "rewards/rejected": -4.298670768737793, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.2981959147159684, |
| "grad_norm": 0.5876765251159668, |
| "learning_rate": 4.816792473723633e-05, |
| "logits/chosen": -54.507286071777344, |
| "logits/rejected": -58.03179168701172, |
| "logps/chosen": -3657.91650390625, |
| "logps/rejected": -3473.89013671875, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.856443405151367, |
| "rewards/margins": 16.580219268798828, |
| "rewards/rejected": -3.7237751483917236, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.30415983301028776, |
| "grad_norm": 1.0686131872716942e-06, |
| "learning_rate": 4.807615149120004e-05, |
| "logits/chosen": -53.92388916015625, |
| "logits/rejected": -56.692657470703125, |
| "logps/chosen": -3720.27685546875, |
| "logps/rejected": -3769.602783203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.57264232635498, |
| "rewards/margins": 18.110240936279297, |
| "rewards/rejected": -3.537597179412842, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.3101237513046071, |
| "grad_norm": 6.154350558063015e-05, |
| "learning_rate": 4.7982227323190845e-05, |
| "logits/chosen": -52.49439239501953, |
| "logits/rejected": -55.725608825683594, |
| "logps/chosen": -3611.25341796875, |
| "logps/rejected": -3967.747314453125, |
| "loss": 0.001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.996103286743164, |
| "rewards/margins": 17.36039161682129, |
| "rewards/rejected": 0.6357126235961914, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3160876695989265, |
| "grad_norm": 1.5112824769403232e-07, |
| "learning_rate": 4.788616098785561e-05, |
| "logits/chosen": -51.7737922668457, |
| "logits/rejected": -54.860435485839844, |
| "logps/chosen": -3689.40869140625, |
| "logps/rejected": -3673.685546875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 19.106679916381836, |
| "rewards/margins": 18.57061767578125, |
| "rewards/rejected": 0.5360631942749023, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.32205158789324584, |
| "grad_norm": 6.17673921585083, |
| "learning_rate": 4.778796143951202e-05, |
| "logits/chosen": -53.8176155090332, |
| "logits/rejected": -57.342063903808594, |
| "logps/chosen": -3829.150390625, |
| "logps/rejected": -4027.96142578125, |
| "loss": 0.0041, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.951896667480469, |
| "rewards/margins": 18.080907821655273, |
| "rewards/rejected": -4.129012107849121, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.32801550618756525, |
| "grad_norm": 11.827872276306152, |
| "learning_rate": 4.768763783131397e-05, |
| "logits/chosen": -53.82781219482422, |
| "logits/rejected": -53.793556213378906, |
| "logps/chosen": -4161.9482421875, |
| "logps/rejected": -4243.11376953125, |
| "loss": 0.0095, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 20.491607666015625, |
| "rewards/margins": 15.793841361999512, |
| "rewards/rejected": 4.697766304016113, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.3339794244818846, |
| "grad_norm": 0.006199230439960957, |
| "learning_rate": 4.7585199514398444e-05, |
| "logits/chosen": -54.75727462768555, |
| "logits/rejected": -56.78379440307617, |
| "logps/chosen": -3925.2890625, |
| "logps/rejected": -4292.37109375, |
| "loss": 0.2788, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 25.08880043029785, |
| "rewards/margins": 15.867774963378906, |
| "rewards/rejected": 9.221026420593262, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.33994334277620397, |
| "grad_norm": 0.0003209487476851791, |
| "learning_rate": 4.7480656037013836e-05, |
| "logits/chosen": -53.4384651184082, |
| "logits/rejected": -55.634620666503906, |
| "logps/chosen": -3506.94384765625, |
| "logps/rejected": -3504.106201171875, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.662946701049805, |
| "rewards/margins": 15.725804328918457, |
| "rewards/rejected": -3.062856435775757, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.34590726107052333, |
| "grad_norm": 7.712010119576007e-05, |
| "learning_rate": 4.7374017143630026e-05, |
| "logits/chosen": -57.10976028442383, |
| "logits/rejected": -59.99627685546875, |
| "logps/chosen": -4153.18603515625, |
| "logps/rejected": -4331.6298828125, |
| "loss": 0.0146, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.0889763832092285, |
| "rewards/margins": 18.375919342041016, |
| "rewards/rejected": -14.286943435668945, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3518711793648427, |
| "grad_norm": 9.873609815258533e-05, |
| "learning_rate": 4.726529277403001e-05, |
| "logits/chosen": -57.93046951293945, |
| "logits/rejected": -58.8515625, |
| "logps/chosen": -4072.68115234375, |
| "logps/rejected": -4284.86962890625, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.040389537811279, |
| "rewards/margins": 16.860654830932617, |
| "rewards/rejected": -10.820265769958496, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.35783509765916205, |
| "grad_norm": 2.0305640646256506e-05, |
| "learning_rate": 4.7154493062383534e-05, |
| "logits/chosen": -59.621116638183594, |
| "logits/rejected": -61.127525329589844, |
| "logps/chosen": -3728.64697265625, |
| "logps/rejected": -4261.62939453125, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 4.971760272979736, |
| "rewards/margins": 19.21591567993164, |
| "rewards/rejected": -14.244155883789062, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.36379901595348146, |
| "grad_norm": 7.320449367398396e-05, |
| "learning_rate": 4.704162833630237e-05, |
| "logits/chosen": -58.1140251159668, |
| "logits/rejected": -60.18278884887695, |
| "logps/chosen": -3717.621826171875, |
| "logps/rejected": -3631.44482421875, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6981580257415771, |
| "rewards/margins": 17.43337631225586, |
| "rewards/rejected": -15.735217094421387, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.3697629342478008, |
| "grad_norm": 4.593440532684326, |
| "learning_rate": 4.692670911587778e-05, |
| "logits/chosen": -59.49884796142578, |
| "logits/rejected": -63.38677978515625, |
| "logps/chosen": -3749.25048828125, |
| "logps/rejected": -4039.53564453125, |
| "loss": 0.0014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.481489658355713, |
| "rewards/margins": 20.123058319091797, |
| "rewards/rejected": -17.64156723022461, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.3757268525421202, |
| "grad_norm": 8.068655006354675e-05, |
| "learning_rate": 4.680974611269987e-05, |
| "logits/chosen": -55.29735565185547, |
| "logits/rejected": -56.667335510253906, |
| "logps/chosen": -4161.16796875, |
| "logps/rejected": -4411.65234375, |
| "loss": 0.1013, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 7.400763034820557, |
| "rewards/margins": 19.986141204833984, |
| "rewards/rejected": -12.585378646850586, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.38169077083643954, |
| "grad_norm": 0.007735874503850937, |
| "learning_rate": 4.669075022885923e-05, |
| "logits/chosen": -59.70928192138672, |
| "logits/rejected": -61.80042266845703, |
| "logps/chosen": -3695.89306640625, |
| "logps/rejected": -3571.295654296875, |
| "loss": 0.0029, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.748845338821411, |
| "rewards/margins": 17.76656150817871, |
| "rewards/rejected": -15.017715454101562, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.3876546891307589, |
| "grad_norm": 0.0010597106302157044, |
| "learning_rate": 4.6569732555930664e-05, |
| "logits/chosen": -60.23158645629883, |
| "logits/rejected": -62.561614990234375, |
| "logps/chosen": -3549.76171875, |
| "logps/rejected": -3746.29443359375, |
| "loss": 0.0015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5340225100517273, |
| "rewards/margins": 19.62533187866211, |
| "rewards/rejected": -20.159353256225586, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.39361860742507826, |
| "grad_norm": 6.7051691985398065e-06, |
| "learning_rate": 4.6446704373939474e-05, |
| "logits/chosen": -59.58567428588867, |
| "logits/rejected": -64.33174133300781, |
| "logps/chosen": -3639.188720703125, |
| "logps/rejected": -4331.96484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.348783016204834, |
| "rewards/margins": 21.183481216430664, |
| "rewards/rejected": -15.834698677062988, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.3995825257193976, |
| "grad_norm": 0.0003712655452545732, |
| "learning_rate": 4.632167715030992e-05, |
| "logits/chosen": -58.96162796020508, |
| "logits/rejected": -60.83256912231445, |
| "logps/chosen": -3720.49169921875, |
| "logps/rejected": -3933.169189453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.91277813911438, |
| "rewards/margins": 18.707401275634766, |
| "rewards/rejected": -14.794624328613281, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.40554644401371703, |
| "grad_norm": 0.000252110738074407, |
| "learning_rate": 4.619466253879643e-05, |
| "logits/chosen": -58.954345703125, |
| "logits/rejected": -61.925331115722656, |
| "logps/chosen": -3840.400390625, |
| "logps/rejected": -3943.953857421875, |
| "loss": 1.1078, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 10.758131980895996, |
| "rewards/margins": 18.48508644104004, |
| "rewards/rejected": -7.72695255279541, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.4115103623080364, |
| "grad_norm": 1.9803482587121835e-07, |
| "learning_rate": 4.606567237839733e-05, |
| "logits/chosen": -58.53533172607422, |
| "logits/rejected": -62.5938720703125, |
| "logps/chosen": -3771.753173828125, |
| "logps/rejected": -3718.02587890625, |
| "loss": 0.0131, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.095252990722656, |
| "rewards/margins": 18.99433708190918, |
| "rewards/rejected": -2.899085760116577, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.41747428060235575, |
| "grad_norm": 0.0626341700553894, |
| "learning_rate": 4.593471869225132e-05, |
| "logits/chosen": -54.60516357421875, |
| "logits/rejected": -55.525299072265625, |
| "logps/chosen": -3515.046875, |
| "logps/rejected": -3357.25390625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 18.759082794189453, |
| "rewards/margins": 17.699445724487305, |
| "rewards/rejected": 1.0596368312835693, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.4234381988966751, |
| "grad_norm": 4.832664126297459e-05, |
| "learning_rate": 4.580181368651683e-05, |
| "logits/chosen": -54.49121856689453, |
| "logits/rejected": -55.52031326293945, |
| "logps/chosen": -3684.757080078125, |
| "logps/rejected": -3753.23828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 20.624637603759766, |
| "rewards/margins": 19.6689395904541, |
| "rewards/rejected": 0.955697238445282, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.42940211719099447, |
| "grad_norm": 5.976962142995035e-07, |
| "learning_rate": 4.5666969749234276e-05, |
| "logits/chosen": -54.036834716796875, |
| "logits/rejected": -56.30643844604492, |
| "logps/chosen": -3887.684814453125, |
| "logps/rejected": -4103.453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 19.976490020751953, |
| "rewards/margins": 20.665203094482422, |
| "rewards/rejected": -0.6887091398239136, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.43536603548531383, |
| "grad_norm": 2.6839097699848935e-05, |
| "learning_rate": 4.553019944917135e-05, |
| "logits/chosen": -53.194984436035156, |
| "logits/rejected": -55.72241973876953, |
| "logps/chosen": -3147.961669921875, |
| "logps/rejected": -3584.89990234375, |
| "loss": 0.0005, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 19.091827392578125, |
| "rewards/margins": 18.645496368408203, |
| "rewards/rejected": 0.44633132219314575, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.44132995377963324, |
| "grad_norm": 5.41134322702419e-05, |
| "learning_rate": 4.539151553465154e-05, |
| "logits/chosen": -57.11003875732422, |
| "logits/rejected": -57.11518096923828, |
| "logps/chosen": -3673.337158203125, |
| "logps/rejected": -3887.12646484375, |
| "loss": 0.0199, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 16.159154891967773, |
| "rewards/margins": 20.49015998840332, |
| "rewards/rejected": -4.3310017585754395, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4472938720739526, |
| "grad_norm": 0.000161544798174873, |
| "learning_rate": 4.52509309323658e-05, |
| "logits/chosen": -56.69603729248047, |
| "logits/rejected": -59.89275360107422, |
| "logps/chosen": -3596.99462890625, |
| "logps/rejected": -4027.516357421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.372100830078125, |
| "rewards/margins": 25.469730377197266, |
| "rewards/rejected": -12.09763240814209, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.45325779036827196, |
| "grad_norm": 1.5759195548525895e-06, |
| "learning_rate": 4.510845874616769e-05, |
| "logits/chosen": -61.1575813293457, |
| "logits/rejected": -63.29572296142578, |
| "logps/chosen": -4064.885498046875, |
| "logps/rejected": -4054.22216796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.724843978881836, |
| "rewards/margins": 20.249258041381836, |
| "rewards/rejected": -7.524415016174316, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4592217086625913, |
| "grad_norm": 0.00013319715799298137, |
| "learning_rate": 4.4964112255852e-05, |
| "logits/chosen": -55.76922607421875, |
| "logits/rejected": -58.15706253051758, |
| "logps/chosen": -3776.48046875, |
| "logps/rejected": -3697.975341796875, |
| "loss": 0.1818, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 12.983065605163574, |
| "rewards/margins": 20.102405548095703, |
| "rewards/rejected": -7.119339942932129, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.4651856269569107, |
| "grad_norm": 0.03267952799797058, |
| "learning_rate": 4.481790491591687e-05, |
| "logits/chosen": -57.81508255004883, |
| "logits/rejected": -60.3544807434082, |
| "logps/chosen": -3629.84765625, |
| "logps/rejected": -3470.53662109375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.1411714553833, |
| "rewards/margins": 16.54482650756836, |
| "rewards/rejected": -3.4036548137664795, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.47114954525123004, |
| "grad_norm": 0.003748134011402726, |
| "learning_rate": 4.466985035430977e-05, |
| "logits/chosen": -58.23984909057617, |
| "logits/rejected": -60.508758544921875, |
| "logps/chosen": -4077.05029296875, |
| "logps/rejected": -4333.125, |
| "loss": 0.004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.593130111694336, |
| "rewards/margins": 17.254772186279297, |
| "rewards/rejected": -1.6616401672363281, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.4771134635455494, |
| "grad_norm": 0.0014063880080357194, |
| "learning_rate": 4.4519962371157196e-05, |
| "logits/chosen": -58.69189453125, |
| "logits/rejected": -59.80534744262695, |
| "logps/chosen": -3442.610595703125, |
| "logps/rejected": -3336.97119140625, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.450828552246094, |
| "rewards/margins": 16.80379867553711, |
| "rewards/rejected": -6.352969646453857, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.4830773818398688, |
| "grad_norm": 4.745288606500253e-05, |
| "learning_rate": 4.4368254937478394e-05, |
| "logits/chosen": -57.631553649902344, |
| "logits/rejected": -62.973785400390625, |
| "logps/chosen": -4004.979736328125, |
| "logps/rejected": -4168.11767578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.3682861328125, |
| "rewards/margins": 22.90373420715332, |
| "rewards/rejected": -14.535449028015137, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.4890413001341882, |
| "grad_norm": 0.0005291652050800622, |
| "learning_rate": 4.4214742193883094e-05, |
| "logits/chosen": -60.228851318359375, |
| "logits/rejected": -61.08269500732422, |
| "logps/chosen": -4001.78173828125, |
| "logps/rejected": -4179.3408203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.373010635375977, |
| "rewards/margins": 20.33767318725586, |
| "rewards/rejected": -10.964665412902832, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.49500521842850753, |
| "grad_norm": 0.00031022998155094683, |
| "learning_rate": 4.40594384492535e-05, |
| "logits/chosen": -57.28828048706055, |
| "logits/rejected": -59.451202392578125, |
| "logps/chosen": -3300.813232421875, |
| "logps/rejected": -3466.85205078125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.398109436035156, |
| "rewards/margins": 20.157407760620117, |
| "rewards/rejected": -10.759299278259277, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.500969136722827, |
| "grad_norm": 6.490218311228091e-06, |
| "learning_rate": 4.390235817941054e-05, |
| "logits/chosen": -59.27235794067383, |
| "logits/rejected": -61.06171798706055, |
| "logps/chosen": -4139.15234375, |
| "logps/rejected": -3869.400390625, |
| "loss": 0.0026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.081242561340332, |
| "rewards/margins": 19.477825164794922, |
| "rewards/rejected": -10.396584510803223, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5069330550171463, |
| "grad_norm": 2.243270955659682e-06, |
| "learning_rate": 4.37435160257646e-05, |
| "logits/chosen": -59.277854919433594, |
| "logits/rejected": -61.45380783081055, |
| "logps/chosen": -3748.130126953125, |
| "logps/rejected": -3891.33349609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.908781051635742, |
| "rewards/margins": 19.269939422607422, |
| "rewards/rejected": -10.36115837097168, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.5128969733114657, |
| "grad_norm": 0.005396808031946421, |
| "learning_rate": 4.358292679395077e-05, |
| "logits/chosen": -61.35847091674805, |
| "logits/rejected": -60.853233337402344, |
| "logps/chosen": -4117.12109375, |
| "logps/rejected": -4071.62109375, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.670493125915527, |
| "rewards/margins": 17.932058334350586, |
| "rewards/rejected": -7.261567115783691, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.518860891605785, |
| "grad_norm": 4.6453762479359284e-05, |
| "learning_rate": 4.342060545244886e-05, |
| "logits/chosen": -59.957664489746094, |
| "logits/rejected": -59.93918991088867, |
| "logps/chosen": -4070.168701171875, |
| "logps/rejected": -4095.42236328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.734319686889648, |
| "rewards/margins": 20.455835342407227, |
| "rewards/rejected": -4.721514701843262, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.5248248099001044, |
| "grad_norm": 2.395652813902416e-07, |
| "learning_rate": 4.3256567131188136e-05, |
| "logits/chosen": -55.78017044067383, |
| "logits/rejected": -59.07427978515625, |
| "logps/chosen": -3826.934326171875, |
| "logps/rejected": -3976.23486328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.891937255859375, |
| "rewards/margins": 23.67508888244629, |
| "rewards/rejected": -6.783150672912598, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5307887281944237, |
| "grad_norm": 2.516942004149314e-06, |
| "learning_rate": 4.3090827120137114e-05, |
| "logits/chosen": -55.632896423339844, |
| "logits/rejected": -59.044334411621094, |
| "logps/chosen": -3536.372314453125, |
| "logps/rejected": -3534.555419921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 18.21674919128418, |
| "rewards/margins": 20.339038848876953, |
| "rewards/rejected": -2.1222903728485107, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.5367526464887431, |
| "grad_norm": 2.6818201149581e-05, |
| "learning_rate": 4.292340086787834e-05, |
| "logits/chosen": -57.886573791503906, |
| "logits/rejected": -58.13624954223633, |
| "logps/chosen": -3513.440673828125, |
| "logps/rejected": -3463.99169921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.707967758178711, |
| "rewards/margins": 19.617687225341797, |
| "rewards/rejected": -3.9097201824188232, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5427165647830625, |
| "grad_norm": 9.664769459050149e-05, |
| "learning_rate": 4.2754303980168495e-05, |
| "logits/chosen": -59.3498420715332, |
| "logits/rejected": -61.16081619262695, |
| "logps/chosen": -4103.5751953125, |
| "logps/rejected": -4111.462890625, |
| "loss": 0.1983, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 7.029057502746582, |
| "rewards/margins": 19.05350112915039, |
| "rewards/rejected": -12.024443626403809, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.5486804830773818, |
| "grad_norm": 0.0024496586993336678, |
| "learning_rate": 4.2583552218483725e-05, |
| "logits/chosen": -58.522064208984375, |
| "logits/rejected": -61.4221305847168, |
| "logps/chosen": -4057.762451171875, |
| "logps/rejected": -4270.6669921875, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0752465724945068, |
| "rewards/margins": 21.978342056274414, |
| "rewards/rejected": -20.903093338012695, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5546444013717012, |
| "grad_norm": 9.883645361696836e-06, |
| "learning_rate": 4.241116149855053e-05, |
| "logits/chosen": -62.64072799682617, |
| "logits/rejected": -65.83187103271484, |
| "logps/chosen": -4190.4921875, |
| "logps/rejected": -4857.25439453125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.829843044281006, |
| "rewards/margins": 26.19070816040039, |
| "rewards/rejected": -33.02054977416992, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.5606083196660205, |
| "grad_norm": 0.00028690064209513366, |
| "learning_rate": 4.2237147888862305e-05, |
| "logits/chosen": -63.4923210144043, |
| "logits/rejected": -66.00108337402344, |
| "logps/chosen": -3985.04833984375, |
| "logps/rejected": -4251.2177734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -9.499919891357422, |
| "rewards/margins": 20.861995697021484, |
| "rewards/rejected": -30.361913681030273, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.56657223796034, |
| "grad_norm": 0.00011731364793376997, |
| "learning_rate": 4.206152760918154e-05, |
| "logits/chosen": -62.154945373535156, |
| "logits/rejected": -65.93985748291016, |
| "logps/chosen": -3877.466796875, |
| "logps/rejected": -3978.805419921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -9.469512939453125, |
| "rewards/margins": 22.071298599243164, |
| "rewards/rejected": -31.54081153869629, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5725361562546593, |
| "grad_norm": 8.252065163105726e-05, |
| "learning_rate": 4.188431702902803e-05, |
| "logits/chosen": -64.38203430175781, |
| "logits/rejected": -67.57643127441406, |
| "logps/chosen": -4045.811279296875, |
| "logps/rejected": -4326.49267578125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -8.671667098999023, |
| "rewards/margins": 22.858341217041016, |
| "rewards/rejected": -31.53000831604004, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5785000745489787, |
| "grad_norm": 4.934684625368391e-07, |
| "learning_rate": 4.1705532666153036e-05, |
| "logits/chosen": -61.06531524658203, |
| "logits/rejected": -64.74785614013672, |
| "logps/chosen": -3762.716064453125, |
| "logps/rejected": -4029.44775390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.9471113681793213, |
| "rewards/margins": 22.45229148864746, |
| "rewards/rejected": -20.50518226623535, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.5844639928432981, |
| "grad_norm": 0.006156011950224638, |
| "learning_rate": 4.152519118499971e-05, |
| "logits/chosen": -62.082862854003906, |
| "logits/rejected": -65.42506408691406, |
| "logps/chosen": -3859.382080078125, |
| "logps/rejected": -4022.57861328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.43494659662246704, |
| "rewards/margins": 22.814266204833984, |
| "rewards/rejected": -22.379318237304688, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.5904279111376174, |
| "grad_norm": 3.552740110990271e-07, |
| "learning_rate": 4.134330939514979e-05, |
| "logits/chosen": -61.07642364501953, |
| "logits/rejected": -64.37804412841797, |
| "logps/chosen": -4356.12841796875, |
| "logps/rejected": -4619.02490234375, |
| "loss": 1.3263, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -3.0648064613342285, |
| "rewards/margins": 20.497941970825195, |
| "rewards/rejected": -23.562747955322266, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.5963918294319368, |
| "grad_norm": 1.0799766414493206e-07, |
| "learning_rate": 4.1159904249756755e-05, |
| "logits/chosen": -60.018157958984375, |
| "logits/rejected": -63.62800216674805, |
| "logps/chosen": -4144.43505859375, |
| "logps/rejected": -4186.046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.4197044372558594, |
| "rewards/margins": 20.778461456298828, |
| "rewards/rejected": -18.35875701904297, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6023557477262561, |
| "grad_norm": 8.552977305953391e-06, |
| "learning_rate": 4.097499284396567e-05, |
| "logits/chosen": -61.17185592651367, |
| "logits/rejected": -65.2453384399414, |
| "logps/chosen": -3919.401611328125, |
| "logps/rejected": -3826.82861328125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.23002290725708, |
| "rewards/margins": 21.679697036743164, |
| "rewards/rejected": -16.449674606323242, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.6083196660205755, |
| "grad_norm": 1.2926037015859038e-05, |
| "learning_rate": 4.0788592413319724e-05, |
| "logits/chosen": -57.3408317565918, |
| "logits/rejected": -57.7392578125, |
| "logps/chosen": -4130.8193359375, |
| "logps/rejected": -3755.291748046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.651645660400391, |
| "rewards/margins": 19.993789672851562, |
| "rewards/rejected": -12.342143058776855, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6142835843148949, |
| "grad_norm": 1.5771300923006493e-06, |
| "learning_rate": 4.060072033215373e-05, |
| "logits/chosen": -55.617210388183594, |
| "logits/rejected": -58.800628662109375, |
| "logps/chosen": -4015.65771484375, |
| "logps/rejected": -4279.2431640625, |
| "loss": 0.0821, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 7.365814208984375, |
| "rewards/margins": 22.02432632446289, |
| "rewards/rejected": -14.6585111618042, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.6202475026092142, |
| "grad_norm": 5.314045483828522e-05, |
| "learning_rate": 4.0411394111974646e-05, |
| "logits/chosen": -57.569549560546875, |
| "logits/rejected": -60.29054641723633, |
| "logps/chosen": -3956.905517578125, |
| "logps/rejected": -4089.600341796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.769119262695312, |
| "rewards/margins": 19.566356658935547, |
| "rewards/rejected": -8.797237396240234, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6262114209035337, |
| "grad_norm": 9.0191870185663e-06, |
| "learning_rate": 4.022063139982934e-05, |
| "logits/chosen": -55.201271057128906, |
| "logits/rejected": -58.512351989746094, |
| "logps/chosen": -3667.98486328125, |
| "logps/rejected": -3769.97998046875, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.191717147827148, |
| "rewards/margins": 19.362136840820312, |
| "rewards/rejected": -8.170419692993164, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.632175339197853, |
| "grad_norm": 2.962595999633777e-06, |
| "learning_rate": 4.0028449976659724e-05, |
| "logits/chosen": -57.57762908935547, |
| "logits/rejected": -61.270423889160156, |
| "logps/chosen": -3651.904296875, |
| "logps/rejected": -3825.788330078125, |
| "loss": 0.0782, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 12.59385871887207, |
| "rewards/margins": 21.37427520751953, |
| "rewards/rejected": -8.780416488647461, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6381392574921724, |
| "grad_norm": 3.167168927120656e-07, |
| "learning_rate": 3.983486775564539e-05, |
| "logits/chosen": -52.46897506713867, |
| "logits/rejected": -56.435447692871094, |
| "logps/chosen": -3674.512451171875, |
| "logps/rejected": -4028.725341796875, |
| "loss": 0.0197, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 10.90526294708252, |
| "rewards/margins": 22.505237579345703, |
| "rewards/rejected": -11.599977493286133, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.6441031757864917, |
| "grad_norm": 7.120500231394544e-05, |
| "learning_rate": 3.963990278053392e-05, |
| "logits/chosen": -56.88371658325195, |
| "logits/rejected": -58.082679748535156, |
| "logps/chosen": -3925.76953125, |
| "logps/rejected": -4151.24560546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.153375625610352, |
| "rewards/margins": 23.492019653320312, |
| "rewards/rejected": -10.33864688873291, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6500670940808111, |
| "grad_norm": 3.851781560371137e-09, |
| "learning_rate": 3.944357322395905e-05, |
| "logits/chosen": -58.075218200683594, |
| "logits/rejected": -61.56608963012695, |
| "logps/chosen": -4175.98583984375, |
| "logps/rejected": -4781.39599609375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.212366104125977, |
| "rewards/margins": 27.55126953125, |
| "rewards/rejected": -17.338903427124023, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.6560310123751305, |
| "grad_norm": 5.771942568344457e-08, |
| "learning_rate": 3.9245897385746775e-05, |
| "logits/chosen": -56.85009765625, |
| "logits/rejected": -59.448028564453125, |
| "logps/chosen": -3711.79296875, |
| "logps/rejected": -3940.455810546875, |
| "loss": 0.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.879262924194336, |
| "rewards/margins": 24.20357322692871, |
| "rewards/rejected": -11.324310302734375, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6619949306694498, |
| "grad_norm": 2.9022641683695838e-05, |
| "learning_rate": 3.9046893691209664e-05, |
| "logits/chosen": -53.4720458984375, |
| "logits/rejected": -57.07532501220703, |
| "logps/chosen": -3489.55224609375, |
| "logps/rejected": -3556.66845703125, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.83867359161377, |
| "rewards/margins": 22.73272705078125, |
| "rewards/rejected": -8.894055366516113, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.6679588489637692, |
| "grad_norm": 3.5439442491735917e-09, |
| "learning_rate": 3.884658068942941e-05, |
| "logits/chosen": -51.687355041503906, |
| "logits/rejected": -57.01649856567383, |
| "logps/chosen": -3604.44384765625, |
| "logps/rejected": -3905.239501953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.09690284729004, |
| "rewards/margins": 26.312124252319336, |
| "rewards/rejected": -10.21522045135498, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.6739227672580885, |
| "grad_norm": 2.719633585002157e-08, |
| "learning_rate": 3.8644977051527885e-05, |
| "logits/chosen": -54.46210861206055, |
| "logits/rejected": -57.665122985839844, |
| "logps/chosen": -3400.2109375, |
| "logps/rejected": -3510.215576171875, |
| "loss": 0.0207, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.349239349365234, |
| "rewards/margins": 22.681480407714844, |
| "rewards/rejected": -5.332241058349609, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.6798866855524079, |
| "grad_norm": 2.1326864043658134e-06, |
| "learning_rate": 3.844210156892683e-05, |
| "logits/chosen": -56.72893142700195, |
| "logits/rejected": -58.62841796875, |
| "logps/chosen": -3941.629638671875, |
| "logps/rejected": -3959.418701171875, |
| "loss": 0.0106, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.112319946289062, |
| "rewards/margins": 23.076122283935547, |
| "rewards/rejected": -11.96380615234375, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.6858506038467272, |
| "grad_norm": 1.068567740958315e-07, |
| "learning_rate": 3.823797315159629e-05, |
| "logits/chosen": -55.89997482299805, |
| "logits/rejected": -58.0466423034668, |
| "logps/chosen": -4139.759765625, |
| "logps/rejected": -3862.365966796875, |
| "loss": 0.0232, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 10.52987289428711, |
| "rewards/margins": 23.36825180053711, |
| "rewards/rejected": -12.83838176727295, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6918145221410467, |
| "grad_norm": 1.9215246993553592e-07, |
| "learning_rate": 3.803261082629198e-05, |
| "logits/chosen": -57.30939865112305, |
| "logits/rejected": -60.035255432128906, |
| "logps/chosen": -3841.99853515625, |
| "logps/rejected": -3897.645263671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.77658462524414, |
| "rewards/margins": 23.096725463867188, |
| "rewards/rejected": -12.320140838623047, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6977784404353661, |
| "grad_norm": 1.6344721416317043e-06, |
| "learning_rate": 3.782603373478194e-05, |
| "logits/chosen": -59.83994674682617, |
| "logits/rejected": -62.819435119628906, |
| "logps/chosen": -3778.887939453125, |
| "logps/rejected": -3805.174560546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 7.7517805099487305, |
| "rewards/margins": 21.68671417236328, |
| "rewards/rejected": -13.934931755065918, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.7037423587296854, |
| "grad_norm": 0.0002575635153334588, |
| "learning_rate": 3.761826113206216e-05, |
| "logits/chosen": -54.88861846923828, |
| "logits/rejected": -58.307289123535156, |
| "logps/chosen": -3845.460205078125, |
| "logps/rejected": -3828.728515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.72022819519043, |
| "rewards/margins": 23.8989200592041, |
| "rewards/rejected": -15.178689956665039, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7097062770240048, |
| "grad_norm": 8.875089406501502e-05, |
| "learning_rate": 3.740931238456195e-05, |
| "logits/chosen": -56.36833572387695, |
| "logits/rejected": -59.8969612121582, |
| "logps/chosen": -3818.005126953125, |
| "logps/rejected": -3839.862060546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.653704643249512, |
| "rewards/margins": 23.86166763305664, |
| "rewards/rejected": -15.207963943481445, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.7156701953183241, |
| "grad_norm": 1.4342495887831319e-05, |
| "learning_rate": 3.7199206968338776e-05, |
| "logits/chosen": -57.47324752807617, |
| "logits/rejected": -61.022796630859375, |
| "logps/chosen": -3948.418701171875, |
| "logps/rejected": -4151.45849609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.350122451782227, |
| "rewards/margins": 23.93897247314453, |
| "rewards/rejected": -13.588849067687988, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7216341136126435, |
| "grad_norm": 0.13247327506542206, |
| "learning_rate": 3.6987964467262866e-05, |
| "logits/chosen": -57.539756774902344, |
| "logits/rejected": -60.251495361328125, |
| "logps/chosen": -3663.866455078125, |
| "logps/rejected": -3746.822265625, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.292342185974121, |
| "rewards/margins": 21.296642303466797, |
| "rewards/rejected": -13.004300117492676, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.7275980319069629, |
| "grad_norm": 1.1016503776772879e-05, |
| "learning_rate": 3.6775604571191835e-05, |
| "logits/chosen": -57.985084533691406, |
| "logits/rejected": -62.3299446105957, |
| "logps/chosen": -3958.30029296875, |
| "logps/rejected": -4056.701171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.709458351135254, |
| "rewards/margins": 25.354206085205078, |
| "rewards/rejected": -16.644746780395508, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7335619502012822, |
| "grad_norm": 5.239522238298377e-07, |
| "learning_rate": 3.6562147074135395e-05, |
| "logits/chosen": -55.77289581298828, |
| "logits/rejected": -56.620445251464844, |
| "logps/chosen": -3935.78173828125, |
| "logps/rejected": -4204.76025390625, |
| "loss": 0.014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.306615829467773, |
| "rewards/margins": 23.17990493774414, |
| "rewards/rejected": -10.873285293579102, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.7395258684956016, |
| "grad_norm": 0.005283840466290712, |
| "learning_rate": 3.6347611872410347e-05, |
| "logits/chosen": -56.01280975341797, |
| "logits/rejected": -59.32996368408203, |
| "logps/chosen": -3594.85009765625, |
| "logps/rejected": -3652.62646484375, |
| "loss": 0.0027, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.890737533569336, |
| "rewards/margins": 21.618831634521484, |
| "rewards/rejected": -9.728094100952148, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.745489786789921, |
| "grad_norm": 4.326713315094821e-05, |
| "learning_rate": 3.6132018962786066e-05, |
| "logits/chosen": -55.68181610107422, |
| "logits/rejected": -60.5903434753418, |
| "logps/chosen": -3649.29248046875, |
| "logps/rejected": -4155.4248046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.527814865112305, |
| "rewards/margins": 24.51072883605957, |
| "rewards/rejected": -9.982913970947266, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.7514537050842404, |
| "grad_norm": 4.07174782779407e-09, |
| "learning_rate": 3.591538844062058e-05, |
| "logits/chosen": -56.679443359375, |
| "logits/rejected": -58.407135009765625, |
| "logps/chosen": -3861.89453125, |
| "logps/rejected": -3911.80078125, |
| "loss": 0.182, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 14.151501655578613, |
| "rewards/margins": 23.692068099975586, |
| "rewards/rejected": -9.540567398071289, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.7574176233785597, |
| "grad_norm": 1.2413269701028184e-07, |
| "learning_rate": 3.5697740497987554e-05, |
| "logits/chosen": -53.7273063659668, |
| "logits/rejected": -59.340187072753906, |
| "logps/chosen": -3629.626220703125, |
| "logps/rejected": -3897.79833984375, |
| "loss": 0.0219, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 11.900304794311523, |
| "rewards/margins": 24.943315505981445, |
| "rewards/rejected": -13.043011665344238, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.7633815416728791, |
| "grad_norm": 4.0087169850266946e-07, |
| "learning_rate": 3.5479095421794087e-05, |
| "logits/chosen": -58.437957763671875, |
| "logits/rejected": -59.74829864501953, |
| "logps/chosen": -4079.52685546875, |
| "logps/rejected": -4265.56298828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.92692756652832, |
| "rewards/margins": 24.08592414855957, |
| "rewards/rejected": -15.158998489379883, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.7693454599671985, |
| "grad_norm": 5.956236464044196e-07, |
| "learning_rate": 3.525947359188988e-05, |
| "logits/chosen": -56.96044158935547, |
| "logits/rejected": -60.473365783691406, |
| "logps/chosen": -3507.78662109375, |
| "logps/rejected": -3547.66455078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.140953063964844, |
| "rewards/margins": 25.788040161132812, |
| "rewards/rejected": -17.6470890045166, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.7753093782615178, |
| "grad_norm": 5.90286333590484e-07, |
| "learning_rate": 3.503889547916757e-05, |
| "logits/chosen": -56.5598258972168, |
| "logits/rejected": -60.83135223388672, |
| "logps/chosen": -3679.897216796875, |
| "logps/rejected": -4072.563720703125, |
| "loss": 0.0251, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 8.379316329956055, |
| "rewards/margins": 24.321413040161133, |
| "rewards/rejected": -15.942098617553711, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.7812732965558372, |
| "grad_norm": 0.00029456091579049826, |
| "learning_rate": 3.4817381643654656e-05, |
| "logits/chosen": -54.76763153076172, |
| "logits/rejected": -57.302955627441406, |
| "logps/chosen": -3611.793701171875, |
| "logps/rejected": -3881.207763671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.57978343963623, |
| "rewards/margins": 24.082733154296875, |
| "rewards/rejected": -12.502950668334961, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.7872372148501565, |
| "grad_norm": 2.6790543117272136e-08, |
| "learning_rate": 3.4594952732597114e-05, |
| "logits/chosen": -57.267723083496094, |
| "logits/rejected": -61.67658233642578, |
| "logps/chosen": -3998.71630859375, |
| "logps/rejected": -4348.85205078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.019363403320312, |
| "rewards/margins": 25.229736328125, |
| "rewards/rejected": -10.210372924804688, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.7932011331444759, |
| "grad_norm": 2.403813823548262e-07, |
| "learning_rate": 3.437162947853488e-05, |
| "logits/chosen": -52.67829132080078, |
| "logits/rejected": -57.03126907348633, |
| "logps/chosen": -3397.01953125, |
| "logps/rejected": -3500.949951171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.576359748840332, |
| "rewards/margins": 23.891281127929688, |
| "rewards/rejected": -10.314921379089355, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.7991650514387952, |
| "grad_norm": 1.207563855132321e-06, |
| "learning_rate": 3.4147432697369366e-05, |
| "logits/chosen": -54.901878356933594, |
| "logits/rejected": -58.046234130859375, |
| "logps/chosen": -3778.25390625, |
| "logps/rejected": -4273.1123046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.951375007629395, |
| "rewards/margins": 22.807498931884766, |
| "rewards/rejected": -7.8561224937438965, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.8051289697331147, |
| "grad_norm": 9.455924009671435e-05, |
| "learning_rate": 3.392238328642319e-05, |
| "logits/chosen": -56.3574333190918, |
| "logits/rejected": -60.68909454345703, |
| "logps/chosen": -3632.43603515625, |
| "logps/rejected": -3841.61767578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.990877151489258, |
| "rewards/margins": 24.18165397644043, |
| "rewards/rejected": -11.190776824951172, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.8110928880274341, |
| "grad_norm": 1.4930514602440326e-09, |
| "learning_rate": 3.3696502222492384e-05, |
| "logits/chosen": -55.69526290893555, |
| "logits/rejected": -59.13079833984375, |
| "logps/chosen": -3696.494140625, |
| "logps/rejected": -3736.537109375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.624422073364258, |
| "rewards/margins": 24.236011505126953, |
| "rewards/rejected": -7.611591339111328, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8170568063217534, |
| "grad_norm": 2.062861739204891e-07, |
| "learning_rate": 3.346981055989114e-05, |
| "logits/chosen": -56.82810592651367, |
| "logits/rejected": -62.07573318481445, |
| "logps/chosen": -3906.99267578125, |
| "logps/rejected": -4055.106689453125, |
| "loss": 0.6131, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 12.317488670349121, |
| "rewards/margins": 21.558252334594727, |
| "rewards/rejected": -9.240762710571289, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.8230207246160728, |
| "grad_norm": 9.348526509711519e-05, |
| "learning_rate": 3.324232942848933e-05, |
| "logits/chosen": -54.5309944152832, |
| "logits/rejected": -54.803611755371094, |
| "logps/chosen": -3432.692626953125, |
| "logps/rejected": -3552.36328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.066215515136719, |
| "rewards/margins": 22.905208587646484, |
| "rewards/rejected": -11.838993072509766, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8289846429103921, |
| "grad_norm": 2.5315921448054723e-07, |
| "learning_rate": 3.3014080031743e-05, |
| "logits/chosen": -53.31890869140625, |
| "logits/rejected": -56.65520095825195, |
| "logps/chosen": -3301.90087890625, |
| "logps/rejected": -3586.98876953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.38410472869873, |
| "rewards/margins": 27.104639053344727, |
| "rewards/rejected": -14.720535278320312, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.8349485612047115, |
| "grad_norm": 1.6616603204511193e-07, |
| "learning_rate": 3.278508364471801e-05, |
| "logits/chosen": -57.218421936035156, |
| "logits/rejected": -59.43006134033203, |
| "logps/chosen": -3883.231201171875, |
| "logps/rejected": -4120.4892578125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.72683048248291, |
| "rewards/margins": 24.0744686126709, |
| "rewards/rejected": -13.347638130187988, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8409124794990309, |
| "grad_norm": 7.670184487551523e-08, |
| "learning_rate": 3.255536161210699e-05, |
| "logits/chosen": -55.0363655090332, |
| "logits/rejected": -57.93296432495117, |
| "logps/chosen": -3802.41455078125, |
| "logps/rejected": -3902.47509765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.028858184814453, |
| "rewards/margins": 22.72267723083496, |
| "rewards/rejected": -10.693819046020508, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.8468763977933502, |
| "grad_norm": 1.7279053565744107e-07, |
| "learning_rate": 3.2324935346239796e-05, |
| "logits/chosen": -54.202125549316406, |
| "logits/rejected": -57.15998458862305, |
| "logps/chosen": -3656.39990234375, |
| "logps/rejected": -3975.80859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.18800163269043, |
| "rewards/margins": 24.70977783203125, |
| "rewards/rejected": -13.52177619934082, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8528403160876696, |
| "grad_norm": 2.6494813454291943e-08, |
| "learning_rate": 3.209382632508768e-05, |
| "logits/chosen": -55.63017654418945, |
| "logits/rejected": -59.00508499145508, |
| "logps/chosen": -4403.7734375, |
| "logps/rejected": -4720.34228515625, |
| "loss": 0.0132, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.121981620788574, |
| "rewards/margins": 23.280654907226562, |
| "rewards/rejected": -13.158673286437988, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.8588042343819889, |
| "grad_norm": 2.5004118242577533e-07, |
| "learning_rate": 3.1862056090261336e-05, |
| "logits/chosen": -54.925018310546875, |
| "logits/rejected": -59.058372497558594, |
| "logps/chosen": -3932.13427734375, |
| "logps/rejected": -3746.4765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.681973457336426, |
| "rewards/margins": 22.887840270996094, |
| "rewards/rejected": -8.205865859985352, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.8647681526763084, |
| "grad_norm": 2.1602265576348145e-07, |
| "learning_rate": 3.162964624500301e-05, |
| "logits/chosen": -54.48186492919922, |
| "logits/rejected": -56.77177047729492, |
| "logps/chosen": -3672.371826171875, |
| "logps/rejected": -3484.89697265625, |
| "loss": 0.0206, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 19.161680221557617, |
| "rewards/margins": 24.562843322753906, |
| "rewards/rejected": -5.401161193847656, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.8707320709706277, |
| "grad_norm": 3.4755198612401728e-06, |
| "learning_rate": 3.139661845217287e-05, |
| "logits/chosen": -51.368988037109375, |
| "logits/rejected": -55.37031936645508, |
| "logps/chosen": -3605.017578125, |
| "logps/rejected": -3807.35595703125, |
| "loss": 0.0054, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.435853958129883, |
| "rewards/margins": 23.906322479248047, |
| "rewards/rejected": -6.470468044281006, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.8766959892649471, |
| "grad_norm": 3.5051095892413286e-06, |
| "learning_rate": 3.11629944322298e-05, |
| "logits/chosen": -58.4751091003418, |
| "logits/rejected": -62.437644958496094, |
| "logps/chosen": -3605.69140625, |
| "logps/rejected": -4044.111328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.948145866394043, |
| "rewards/margins": 26.044723510742188, |
| "rewards/rejected": -11.096578598022461, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.8826599075592665, |
| "grad_norm": 5.029290406355358e-08, |
| "learning_rate": 3.092879596120689e-05, |
| "logits/chosen": -59.90129852294922, |
| "logits/rejected": -63.65130615234375, |
| "logps/chosen": -4040.684326171875, |
| "logps/rejected": -3993.44677734375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.216289520263672, |
| "rewards/margins": 23.25588607788086, |
| "rewards/rejected": -13.039596557617188, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.8886238258535858, |
| "grad_norm": 1.237174153327942, |
| "learning_rate": 3.06940448686816e-05, |
| "logits/chosen": -59.22917556762695, |
| "logits/rejected": -61.15869140625, |
| "logps/chosen": -3975.68408203125, |
| "logps/rejected": -4147.9228515625, |
| "loss": 0.0006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.068977355957031, |
| "rewards/margins": 24.409832000732422, |
| "rewards/rejected": -11.340853691101074, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.8945877441479052, |
| "grad_norm": 9.023612801684067e-05, |
| "learning_rate": 3.045876303574116e-05, |
| "logits/chosen": -59.75891876220703, |
| "logits/rejected": -62.84197998046875, |
| "logps/chosen": -3887.1640625, |
| "logps/rejected": -3652.65869140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.598661422729492, |
| "rewards/margins": 22.873065948486328, |
| "rewards/rejected": -11.274404525756836, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9005516624422245, |
| "grad_norm": 4.560320121527184e-08, |
| "learning_rate": 3.0222972392942943e-05, |
| "logits/chosen": -58.6246337890625, |
| "logits/rejected": -61.87261962890625, |
| "logps/chosen": -3715.770263671875, |
| "logps/rejected": -4073.483154296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.044273376464844, |
| "rewards/margins": 26.504796981811523, |
| "rewards/rejected": -16.460525512695312, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.9065155807365439, |
| "grad_norm": 1.8267148504946817e-07, |
| "learning_rate": 2.998669491827035e-05, |
| "logits/chosen": -54.9869499206543, |
| "logits/rejected": -58.3288459777832, |
| "logps/chosen": -3689.07373046875, |
| "logps/rejected": -3788.818359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.276895523071289, |
| "rewards/margins": 23.46544075012207, |
| "rewards/rejected": -14.188543319702148, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9124794990308632, |
| "grad_norm": 0.07166226208209991, |
| "learning_rate": 2.9749952635084254e-05, |
| "logits/chosen": -58.55971145629883, |
| "logits/rejected": -61.770591735839844, |
| "logps/chosen": -3907.703125, |
| "logps/rejected": -3977.048095703125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.65731430053711, |
| "rewards/margins": 23.61507225036621, |
| "rewards/rejected": -14.957756042480469, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.9184434173251826, |
| "grad_norm": 5.0479648052714765e-06, |
| "learning_rate": 2.9512767610070235e-05, |
| "logits/chosen": -58.743080139160156, |
| "logits/rejected": -62.081268310546875, |
| "logps/chosen": -4091.01171875, |
| "logps/rejected": -4007.314453125, |
| "loss": 0.0127, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 6.914778232574463, |
| "rewards/margins": 22.504919052124023, |
| "rewards/rejected": -15.590141296386719, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.9244073356195021, |
| "grad_norm": 5.116536527793869e-09, |
| "learning_rate": 2.927516195118167e-05, |
| "logits/chosen": -58.5189323425293, |
| "logits/rejected": -60.991737365722656, |
| "logps/chosen": -3521.76025390625, |
| "logps/rejected": -3490.95068359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 5.615170478820801, |
| "rewards/margins": 23.283008575439453, |
| "rewards/rejected": -17.667837142944336, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.9303712539138214, |
| "grad_norm": 3.0113267257547705e-06, |
| "learning_rate": 2.903715780557915e-05, |
| "logits/chosen": -58.911537170410156, |
| "logits/rejected": -61.680030822753906, |
| "logps/chosen": -3976.016357421875, |
| "logps/rejected": -3833.219482421875, |
| "loss": 0.0708, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 5.67651891708374, |
| "rewards/margins": 25.14394187927246, |
| "rewards/rejected": -19.467424392700195, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.9363351722081408, |
| "grad_norm": 7.281344949205959e-08, |
| "learning_rate": 2.8798777357566102e-05, |
| "logits/chosen": -62.60260009765625, |
| "logits/rejected": -64.15509796142578, |
| "logps/chosen": -4138.4677734375, |
| "logps/rejected": -3727.09375, |
| "loss": 0.0231, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 6.600105285644531, |
| "rewards/margins": 23.73483657836914, |
| "rewards/rejected": -17.13473129272461, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.9422990905024601, |
| "grad_norm": 1.6167986416348867e-07, |
| "learning_rate": 2.8560042826520983e-05, |
| "logits/chosen": -55.1787223815918, |
| "logits/rejected": -57.27393341064453, |
| "logps/chosen": -3631.578125, |
| "logps/rejected": -3276.724609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.137773513793945, |
| "rewards/margins": 24.159759521484375, |
| "rewards/rejected": -9.021984100341797, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.9482630087967795, |
| "grad_norm": 0.0022099693305790424, |
| "learning_rate": 2.8320976464826233e-05, |
| "logits/chosen": -56.308815002441406, |
| "logits/rejected": -58.647621154785156, |
| "logps/chosen": -3679.826171875, |
| "logps/rejected": -3692.447998046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.499637603759766, |
| "rewards/margins": 25.266708374023438, |
| "rewards/rejected": -7.767067909240723, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.9542269270910988, |
| "grad_norm": 14.162381172180176, |
| "learning_rate": 2.808160055579418e-05, |
| "logits/chosen": -58.639259338378906, |
| "logits/rejected": -61.093353271484375, |
| "logps/chosen": -4223.41552734375, |
| "logps/rejected": -4175.84814453125, |
| "loss": 0.0454, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 21.25933265686035, |
| "rewards/margins": 25.541118621826172, |
| "rewards/rejected": -4.281786918640137, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9601908453854182, |
| "grad_norm": 5.322955871633894e-07, |
| "learning_rate": 2.784193741158993e-05, |
| "logits/chosen": -54.426841735839844, |
| "logits/rejected": -58.14940643310547, |
| "logps/chosen": -3445.11474609375, |
| "logps/rejected": -3718.72119140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 18.399213790893555, |
| "rewards/margins": 24.536479949951172, |
| "rewards/rejected": -6.137265682220459, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.9661547636797376, |
| "grad_norm": 5.999038876325358e-06, |
| "learning_rate": 2.7602009371151717e-05, |
| "logits/chosen": -57.2712287902832, |
| "logits/rejected": -61.121788024902344, |
| "logps/chosen": -4308.03466796875, |
| "logps/rejected": -4611.1044921875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 22.73969078063965, |
| "rewards/margins": 24.401287078857422, |
| "rewards/rejected": -1.6615936756134033, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.9721186819740569, |
| "grad_norm": 0.005200933199375868, |
| "learning_rate": 2.7361838798108714e-05, |
| "logits/chosen": -56.9376220703125, |
| "logits/rejected": -59.23795700073242, |
| "logps/chosen": -3882.723876953125, |
| "logps/rejected": -4050.682373046875, |
| "loss": 0.0016, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 20.68083953857422, |
| "rewards/margins": 24.391653060913086, |
| "rewards/rejected": -3.710813045501709, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.9780826002683763, |
| "grad_norm": 2.6656310936346017e-08, |
| "learning_rate": 2.7121448078696437e-05, |
| "logits/chosen": -53.07708740234375, |
| "logits/rejected": -57.34746170043945, |
| "logps/chosen": -3162.02685546875, |
| "logps/rejected": -3019.009033203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 20.784738540649414, |
| "rewards/margins": 26.76715087890625, |
| "rewards/rejected": -5.982410907745361, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.9840465185626956, |
| "grad_norm": 1.415725847664362e-07, |
| "learning_rate": 2.6880859619670236e-05, |
| "logits/chosen": -54.78126907348633, |
| "logits/rejected": -57.343162536621094, |
| "logps/chosen": -3768.965576171875, |
| "logps/rejected": -3766.91259765625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 20.079748153686523, |
| "rewards/margins": 26.081069946289062, |
| "rewards/rejected": -6.0013227462768555, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.9900104368570151, |
| "grad_norm": 1.0123216043211869e-08, |
| "learning_rate": 2.66400958462167e-05, |
| "logits/chosen": -55.69786834716797, |
| "logits/rejected": -60.4398078918457, |
| "logps/chosen": -3736.546142578125, |
| "logps/rejected": -4183.6357421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.356075286865234, |
| "rewards/margins": 28.006738662719727, |
| "rewards/rejected": -11.650662422180176, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.9959743551513345, |
| "grad_norm": 2.6384802254142414e-07, |
| "learning_rate": 2.6399179199863423e-05, |
| "logits/chosen": -55.284812927246094, |
| "logits/rejected": -59.626625061035156, |
| "logps/chosen": -3676.928466796875, |
| "logps/rejected": -3803.169921875, |
| "loss": 0.0558, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 14.217809677124023, |
| "rewards/margins": 24.813610076904297, |
| "rewards/rejected": -10.595799446105957, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.0017891754882957, |
| "grad_norm": 7.79418769525364e-06, |
| "learning_rate": 2.6158132136387247e-05, |
| "logits/chosen": -57.85382843017578, |
| "logits/rejected": -59.920597076416016, |
| "logps/chosen": -3755.565673828125, |
| "logps/rejected": -4092.383056640625, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.796311378479004, |
| "rewards/margins": 25.673641204833984, |
| "rewards/rejected": -10.87733268737793, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.0077530937826151, |
| "grad_norm": 3.236905854464567e-08, |
| "learning_rate": 2.5916977123721166e-05, |
| "logits/chosen": -57.10400390625, |
| "logits/rejected": -59.06767654418945, |
| "logps/chosen": -3694.768798828125, |
| "logps/rejected": -4206.1513671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.047399520874023, |
| "rewards/margins": 24.886272430419922, |
| "rewards/rejected": -13.838871955871582, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.0137170120769345, |
| "grad_norm": 9.515994747744116e-08, |
| "learning_rate": 2.5675736639860077e-05, |
| "logits/chosen": -55.722320556640625, |
| "logits/rejected": -59.74749755859375, |
| "logps/chosen": -3329.92041015625, |
| "logps/rejected": -3575.55224609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.99412727355957, |
| "rewards/margins": 26.575632095336914, |
| "rewards/rejected": -10.581506729125977, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.019680930371254, |
| "grad_norm": 2.12275141908691e-10, |
| "learning_rate": 2.5434433170765635e-05, |
| "logits/chosen": -55.73252487182617, |
| "logits/rejected": -59.35845947265625, |
| "logps/chosen": -3671.85693359375, |
| "logps/rejected": -4161.1396484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.043779373168945, |
| "rewards/margins": 28.186742782592773, |
| "rewards/rejected": -13.142965316772461, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.0256448486655734, |
| "grad_norm": 1.0201654276897898e-06, |
| "learning_rate": 2.5193089208270332e-05, |
| "logits/chosen": -55.662010192871094, |
| "logits/rejected": -60.647422790527344, |
| "logps/chosen": -3821.21240234375, |
| "logps/rejected": -3968.89306640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.09891128540039, |
| "rewards/margins": 27.115798950195312, |
| "rewards/rejected": -11.016887664794922, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.0316087669598926, |
| "grad_norm": 4.6147793919537605e-10, |
| "learning_rate": 2.4951727247981026e-05, |
| "logits/chosen": -57.985267639160156, |
| "logits/rejected": -60.434349060058594, |
| "logps/chosen": -3654.449951171875, |
| "logps/rejected": -3963.91162109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.611637115478516, |
| "rewards/margins": 26.88236427307129, |
| "rewards/rejected": -9.270727157592773, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.037572685254212, |
| "grad_norm": 6.404677696991712e-05, |
| "learning_rate": 2.4710369787182163e-05, |
| "logits/chosen": -57.09203338623047, |
| "logits/rejected": -61.46424102783203, |
| "logps/chosen": -3566.01416015625, |
| "logps/rejected": -3992.688720703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.677534103393555, |
| "rewards/margins": 24.495372772216797, |
| "rewards/rejected": -8.81783676147461, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.0435366035485314, |
| "grad_norm": 3.6316012597126246e-07, |
| "learning_rate": 2.4469039322738786e-05, |
| "logits/chosen": -56.2172737121582, |
| "logits/rejected": -61.31587600708008, |
| "logps/chosen": -3745.28369140625, |
| "logps/rejected": -3803.864501953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.529300689697266, |
| "rewards/margins": 27.05365562438965, |
| "rewards/rejected": -10.524356842041016, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.0495005218428508, |
| "grad_norm": 3.816726064087561e-07, |
| "learning_rate": 2.42277583489996e-05, |
| "logits/chosen": -59.777565002441406, |
| "logits/rejected": -63.48384475708008, |
| "logps/chosen": -3635.927734375, |
| "logps/rejected": -4089.21630859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.0086088180542, |
| "rewards/margins": 25.672292709350586, |
| "rewards/rejected": -11.663687705993652, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.0554644401371702, |
| "grad_norm": 2.641392484292737e-06, |
| "learning_rate": 2.3986549355700308e-05, |
| "logits/chosen": -59.445106506347656, |
| "logits/rejected": -62.23298263549805, |
| "logps/chosen": -3913.08447265625, |
| "logps/rejected": -4078.412109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.499652862548828, |
| "rewards/margins": 24.941543579101562, |
| "rewards/rejected": -8.441890716552734, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.0614283584314894, |
| "grad_norm": 3.112697655183183e-08, |
| "learning_rate": 2.3745434825867347e-05, |
| "logits/chosen": -55.928680419921875, |
| "logits/rejected": -60.84385299682617, |
| "logps/chosen": -3434.234375, |
| "logps/rejected": -3640.389892578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.650065422058105, |
| "rewards/margins": 28.1390323638916, |
| "rewards/rejected": -12.488967895507812, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.0673922767258088, |
| "grad_norm": 2.6007089672930306e-06, |
| "learning_rate": 2.3504437233722214e-05, |
| "logits/chosen": -56.6685905456543, |
| "logits/rejected": -62.179771423339844, |
| "logps/chosen": -3519.731201171875, |
| "logps/rejected": -3699.080810546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.84507942199707, |
| "rewards/margins": 26.682373046875, |
| "rewards/rejected": -10.83729362487793, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.0733561950201282, |
| "grad_norm": 1.2538286447525024, |
| "learning_rate": 2.3263579042586697e-05, |
| "logits/chosen": -54.93901824951172, |
| "logits/rejected": -57.605445861816406, |
| "logps/chosen": -3739.166748046875, |
| "logps/rejected": -3709.137451171875, |
| "loss": 0.0009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.472308158874512, |
| "rewards/margins": 25.952701568603516, |
| "rewards/rejected": -10.480390548706055, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.0793201133144477, |
| "grad_norm": 1.3886967964449326e-11, |
| "learning_rate": 2.302288270278904e-05, |
| "logits/chosen": -58.45922088623047, |
| "logits/rejected": -60.769447326660156, |
| "logps/chosen": -4227.611328125, |
| "logps/rejected": -4209.0576171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.69550609588623, |
| "rewards/margins": 27.71603012084961, |
| "rewards/rejected": -12.020523071289062, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.0852840316087669, |
| "grad_norm": 4.680672418544418e-07, |
| "learning_rate": 2.2782370649571368e-05, |
| "logits/chosen": -57.41197967529297, |
| "logits/rejected": -61.311851501464844, |
| "logps/chosen": -3252.1953125, |
| "logps/rejected": -3305.15966796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.210145950317383, |
| "rewards/margins": 26.91498374938965, |
| "rewards/rejected": -14.704841613769531, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.0912479499030863, |
| "grad_norm": 1.5242182598740328e-05, |
| "learning_rate": 2.25420653009985e-05, |
| "logits/chosen": -60.623023986816406, |
| "logits/rejected": -65.05506134033203, |
| "logps/chosen": -3815.992919921875, |
| "logps/rejected": -3862.602783203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.181077003479004, |
| "rewards/margins": 28.176227569580078, |
| "rewards/rejected": -16.995149612426758, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.0972118681974057, |
| "grad_norm": 0.0001548586442368105, |
| "learning_rate": 2.2301989055868383e-05, |
| "logits/chosen": -59.02531051635742, |
| "logits/rejected": -63.0557975769043, |
| "logps/chosen": -3652.93701171875, |
| "logps/rejected": -3930.30419921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.64163875579834, |
| "rewards/margins": 25.959850311279297, |
| "rewards/rejected": -17.318214416503906, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.103175786491725, |
| "grad_norm": 2.7569740268518217e-05, |
| "learning_rate": 2.2062164291624284e-05, |
| "logits/chosen": -59.61224365234375, |
| "logits/rejected": -62.123130798339844, |
| "logps/chosen": -4048.87939453125, |
| "logps/rejected": -4120.58837890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.486291885375977, |
| "rewards/margins": 29.3957462310791, |
| "rewards/rejected": -17.909452438354492, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.1091397047860445, |
| "grad_norm": 5.057771423899737e-10, |
| "learning_rate": 2.1822613362269e-05, |
| "logits/chosen": -57.71868896484375, |
| "logits/rejected": -60.3415412902832, |
| "logps/chosen": -4136.40966796875, |
| "logps/rejected": -4483.23583984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.933389663696289, |
| "rewards/margins": 29.532873153686523, |
| "rewards/rejected": -18.599483489990234, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.1151036230803637, |
| "grad_norm": 3.651170255025704e-09, |
| "learning_rate": 2.158335859628126e-05, |
| "logits/chosen": -58.62504959106445, |
| "logits/rejected": -62.4349479675293, |
| "logps/chosen": -4120.52001953125, |
| "logps/rejected": -4143.6689453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.698260307312012, |
| "rewards/margins": 27.590999603271484, |
| "rewards/rejected": -17.892738342285156, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.1210675413746831, |
| "grad_norm": 1.4945511495056962e-08, |
| "learning_rate": 2.1344422294534466e-05, |
| "logits/chosen": -54.32935333251953, |
| "logits/rejected": -58.437278747558594, |
| "logps/chosen": -3511.121826171875, |
| "logps/rejected": -3695.01416015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.38096809387207, |
| "rewards/margins": 27.27777099609375, |
| "rewards/rejected": -15.896801948547363, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.1270314596690025, |
| "grad_norm": 3.889097206410952e-05, |
| "learning_rate": 2.1105826728218072e-05, |
| "logits/chosen": -57.310203552246094, |
| "logits/rejected": -61.566688537597656, |
| "logps/chosen": -3701.465576171875, |
| "logps/rejected": -4115.97802734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.651991844177246, |
| "rewards/margins": 25.42984962463379, |
| "rewards/rejected": -16.77785873413086, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.132995377963322, |
| "grad_norm": 3.0905356052102206e-09, |
| "learning_rate": 2.086759413676166e-05, |
| "logits/chosen": -59.252281188964844, |
| "logits/rejected": -61.68608474731445, |
| "logps/chosen": -4090.481201171875, |
| "logps/rejected": -4027.00390625, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.865057945251465, |
| "rewards/margins": 25.79592514038086, |
| "rewards/rejected": -16.930866241455078, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.1389592962576414, |
| "grad_norm": 9.62276430982456e-07, |
| "learning_rate": 2.062974672576203e-05, |
| "logits/chosen": -55.8050537109375, |
| "logits/rejected": -59.1325798034668, |
| "logps/chosen": -3845.809814453125, |
| "logps/rejected": -3548.93994140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.28779411315918, |
| "rewards/margins": 27.17965316772461, |
| "rewards/rejected": -15.891860961914062, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.1449232145519606, |
| "grad_norm": 5.546740311501708e-08, |
| "learning_rate": 2.0392306664913414e-05, |
| "logits/chosen": -59.28105926513672, |
| "logits/rejected": -62.87616729736328, |
| "logps/chosen": -3794.875732421875, |
| "logps/rejected": -4073.58740234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.75609016418457, |
| "rewards/margins": 28.09228515625, |
| "rewards/rejected": -17.336193084716797, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.15088713284628, |
| "grad_norm": 7.052684480868265e-08, |
| "learning_rate": 2.015529608594104e-05, |
| "logits/chosen": -60.80534744262695, |
| "logits/rejected": -63.97881317138672, |
| "logps/chosen": -4513.54541015625, |
| "logps/rejected": -4882.16796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 9.083196640014648, |
| "rewards/margins": 27.9643611907959, |
| "rewards/rejected": -18.88116455078125, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.1568510511405994, |
| "grad_norm": 0.0006892980891279876, |
| "learning_rate": 1.991873708053823e-05, |
| "logits/chosen": -59.05274200439453, |
| "logits/rejected": -59.57912063598633, |
| "logps/chosen": -4209.0419921875, |
| "logps/rejected": -4109.14306640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 8.459901809692383, |
| "rewards/margins": 23.901077270507812, |
| "rewards/rejected": -15.44117546081543, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.1628149694349188, |
| "grad_norm": 0.014071582816541195, |
| "learning_rate": 1.968265169830728e-05, |
| "logits/chosen": -58.680267333984375, |
| "logits/rejected": -62.526641845703125, |
| "logps/chosen": -4415.60400390625, |
| "logps/rejected": -4929.56689453125, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.520208358764648, |
| "rewards/margins": 27.615558624267578, |
| "rewards/rejected": -16.09535026550293, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.1687788877292382, |
| "grad_norm": 1.5595247315758343e-09, |
| "learning_rate": 1.9447061944704173e-05, |
| "logits/chosen": -57.07389450073242, |
| "logits/rejected": -61.6367301940918, |
| "logps/chosen": -3919.495361328125, |
| "logps/rejected": -3924.32177734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.67729949951172, |
| "rewards/margins": 28.186996459960938, |
| "rewards/rejected": -11.509698867797852, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.1747428060235574, |
| "grad_norm": 2.3406285265537008e-07, |
| "learning_rate": 1.9211989778987502e-05, |
| "logits/chosen": -55.24726486206055, |
| "logits/rejected": -58.94435501098633, |
| "logps/chosen": -3895.52197265625, |
| "logps/rejected": -3826.360107421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.88887596130371, |
| "rewards/margins": 26.167491912841797, |
| "rewards/rejected": -8.278615951538086, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.1807067243178768, |
| "grad_norm": 2.7980338046518227e-09, |
| "learning_rate": 1.897745711217161e-05, |
| "logits/chosen": -57.610687255859375, |
| "logits/rejected": -61.2164192199707, |
| "logps/chosen": -3854.04736328125, |
| "logps/rejected": -4032.45703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.306734085083008, |
| "rewards/margins": 25.79241943359375, |
| "rewards/rejected": -8.485687255859375, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.1866706426121962, |
| "grad_norm": 0.0017272484255954623, |
| "learning_rate": 1.8743485804984294e-05, |
| "logits/chosen": -56.56272506713867, |
| "logits/rejected": -60.19001388549805, |
| "logps/chosen": -3713.989501953125, |
| "logps/rejected": -3903.66943359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.19795036315918, |
| "rewards/margins": 26.44158935546875, |
| "rewards/rejected": -9.243639945983887, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.1926345609065157, |
| "grad_norm": 4.515972318319683e-10, |
| "learning_rate": 1.8510097665829177e-05, |
| "logits/chosen": -54.333839416503906, |
| "logits/rejected": -58.24871063232422, |
| "logps/chosen": -3881.78271484375, |
| "logps/rejected": -3991.203125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.033931732177734, |
| "rewards/margins": 26.011281967163086, |
| "rewards/rejected": -8.977351188659668, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.198598479200835, |
| "grad_norm": 1.3980934454593807e-07, |
| "learning_rate": 1.827731444875293e-05, |
| "logits/chosen": -56.24668502807617, |
| "logits/rejected": -59.58536911010742, |
| "logps/chosen": -3488.839111328125, |
| "logps/rejected": -3365.36474609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 17.400005340576172, |
| "rewards/margins": 26.981313705444336, |
| "rewards/rejected": -9.581306457519531, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.2045623974951543, |
| "grad_norm": 5.057323448909301e-09, |
| "learning_rate": 1.804515785141761e-05, |
| "logits/chosen": -53.02077102661133, |
| "logits/rejected": -55.9692497253418, |
| "logps/chosen": -3452.415283203125, |
| "logps/rejected": -3728.23046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.993364334106445, |
| "rewards/margins": 28.094369888305664, |
| "rewards/rejected": -12.101005554199219, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.2105263157894737, |
| "grad_norm": 2.0057086658198386e-07, |
| "learning_rate": 1.7813649513078206e-05, |
| "logits/chosen": -56.12092208862305, |
| "logits/rejected": -64.37012481689453, |
| "logps/chosen": -3950.99658203125, |
| "logps/rejected": -4666.666015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.969812393188477, |
| "rewards/margins": 27.363088607788086, |
| "rewards/rejected": -10.393278121948242, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.216490234083793, |
| "grad_norm": 6.621379355919998e-08, |
| "learning_rate": 1.758281101256567e-05, |
| "logits/chosen": -56.147132873535156, |
| "logits/rejected": -59.15153121948242, |
| "logps/chosen": -3801.32470703125, |
| "logps/rejected": -3779.278564453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.260011672973633, |
| "rewards/margins": 27.139633178710938, |
| "rewards/rejected": -10.879620552062988, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.2224541523781125, |
| "grad_norm": 2.4662213036208414e-06, |
| "learning_rate": 1.735266386627554e-05, |
| "logits/chosen": -53.46790313720703, |
| "logits/rejected": -58.09453201293945, |
| "logps/chosen": -3495.250732421875, |
| "logps/rejected": -3734.54541015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.44164752960205, |
| "rewards/margins": 24.200706481933594, |
| "rewards/rejected": -8.759058952331543, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.2284180706724317, |
| "grad_norm": 3.332436904202041e-07, |
| "learning_rate": 1.7123229526162394e-05, |
| "logits/chosen": -59.822052001953125, |
| "logits/rejected": -64.50569915771484, |
| "logps/chosen": -3889.31103515625, |
| "logps/rejected": -4200.05615234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.452617645263672, |
| "rewards/margins": 26.027156829833984, |
| "rewards/rejected": -9.574542999267578, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.234381988966751, |
| "grad_norm": 4.8211667547093384e-08, |
| "learning_rate": 1.6894529377740355e-05, |
| "logits/chosen": -55.5076789855957, |
| "logits/rejected": -59.86821365356445, |
| "logps/chosen": -3581.645751953125, |
| "logps/rejected": -3897.926513671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.29252052307129, |
| "rewards/margins": 26.74545669555664, |
| "rewards/rejected": -10.452935218811035, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.2403459072610705, |
| "grad_norm": 3.915023327749623e-08, |
| "learning_rate": 1.6666584738089735e-05, |
| "logits/chosen": -55.1793327331543, |
| "logits/rejected": -57.694480895996094, |
| "logps/chosen": -3753.420654296875, |
| "logps/rejected": -3864.955078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.927478790283203, |
| "rewards/margins": 26.437557220458984, |
| "rewards/rejected": -9.51008129119873, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.24630982555539, |
| "grad_norm": 6.761503641428135e-07, |
| "learning_rate": 1.6439416853870042e-05, |
| "logits/chosen": -56.9517936706543, |
| "logits/rejected": -61.928070068359375, |
| "logps/chosen": -3902.32421875, |
| "logps/rejected": -4099.13623046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.528411865234375, |
| "rewards/margins": 28.16168212890625, |
| "rewards/rejected": -11.633268356323242, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.2522737438497091, |
| "grad_norm": 2.90481926706887e-11, |
| "learning_rate": 1.621304689933967e-05, |
| "logits/chosen": -56.108123779296875, |
| "logits/rejected": -61.044708251953125, |
| "logps/chosen": -3557.475830078125, |
| "logps/rejected": -3533.578857421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.501747131347656, |
| "rewards/margins": 24.694103240966797, |
| "rewards/rejected": -11.19235610961914, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.2582376621440285, |
| "grad_norm": 2.608700588879742e-09, |
| "learning_rate": 1.5987495974382154e-05, |
| "logits/chosen": -56.22504806518555, |
| "logits/rejected": -59.918357849121094, |
| "logps/chosen": -3823.78515625, |
| "logps/rejected": -3600.735107421875, |
| "loss": 0.0216, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 15.420328140258789, |
| "rewards/margins": 26.53145980834961, |
| "rewards/rejected": -11.11113166809082, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.264201580438348, |
| "grad_norm": 3.650003321808981e-08, |
| "learning_rate": 1.5762785102539508e-05, |
| "logits/chosen": -56.17362594604492, |
| "logits/rejected": -59.19562530517578, |
| "logps/chosen": -3761.76708984375, |
| "logps/rejected": -3929.08740234375, |
| "loss": 4.0644, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 15.054628372192383, |
| "rewards/margins": 21.474042892456055, |
| "rewards/rejected": -6.419415473937988, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.2701654987326674, |
| "grad_norm": 9.014668123086267e-09, |
| "learning_rate": 1.5538935229052624e-05, |
| "logits/chosen": -55.22417068481445, |
| "logits/rejected": -62.16356658935547, |
| "logps/chosen": -3842.323486328125, |
| "logps/rejected": -4168.7177734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.2366361618042, |
| "rewards/margins": 33.672088623046875, |
| "rewards/rejected": -18.435455322265625, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.2761294170269868, |
| "grad_norm": 1.4135798309666825e-08, |
| "learning_rate": 1.531596721890897e-05, |
| "logits/chosen": -56.111854553222656, |
| "logits/rejected": -60.602996826171875, |
| "logps/chosen": -3569.96875, |
| "logps/rejected": -3690.09521484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.550596237182617, |
| "rewards/margins": 27.969738006591797, |
| "rewards/rejected": -16.419147491455078, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.282093335321306, |
| "grad_norm": 2.9603810617118143e-05, |
| "learning_rate": 1.5093901854897745e-05, |
| "logits/chosen": -58.67523193359375, |
| "logits/rejected": -60.438629150390625, |
| "logps/chosen": -4015.98095703125, |
| "logps/rejected": -4065.284423828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.875455856323242, |
| "rewards/margins": 27.658016204833984, |
| "rewards/rejected": -13.782560348510742, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.2880572536156254, |
| "grad_norm": 5.880232967214738e-10, |
| "learning_rate": 1.4872759835672755e-05, |
| "logits/chosen": -54.29423904418945, |
| "logits/rejected": -58.524017333984375, |
| "logps/chosen": -3648.724609375, |
| "logps/rejected": -3542.318359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.874812126159668, |
| "rewards/margins": 27.214290618896484, |
| "rewards/rejected": -14.3394775390625, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.2940211719099448, |
| "grad_norm": 6.31591660749109e-07, |
| "learning_rate": 1.4652561773823103e-05, |
| "logits/chosen": -56.8558464050293, |
| "logits/rejected": -62.6048698425293, |
| "logps/chosen": -3897.655517578125, |
| "logps/rejected": -4199.82958984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.988385200500488, |
| "rewards/margins": 29.572118759155273, |
| "rewards/rejected": -18.5837345123291, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.2999850902042642, |
| "grad_norm": 6.179973297548713e-07, |
| "learning_rate": 1.4433328193951837e-05, |
| "logits/chosen": -57.0740966796875, |
| "logits/rejected": -59.751129150390625, |
| "logps/chosen": -3662.604736328125, |
| "logps/rejected": -3552.6328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.184779167175293, |
| "rewards/margins": 25.6380558013916, |
| "rewards/rejected": -15.453274726867676, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.3059490084985836, |
| "grad_norm": 3.950074471958942e-07, |
| "learning_rate": 1.421507953076291e-05, |
| "logits/chosen": -59.95570755004883, |
| "logits/rejected": -64.49372863769531, |
| "logps/chosen": -3971.001953125, |
| "logps/rejected": -4431.60009765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.856526374816895, |
| "rewards/margins": 29.100147247314453, |
| "rewards/rejected": -17.243621826171875, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.3119129267929028, |
| "grad_norm": 1.1393160720407636e-10, |
| "learning_rate": 1.3997836127156457e-05, |
| "logits/chosen": -57.049774169921875, |
| "logits/rejected": -61.720191955566406, |
| "logps/chosen": -3714.94189453125, |
| "logps/rejected": -3634.033935546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.39828872680664, |
| "rewards/margins": 26.935588836669922, |
| "rewards/rejected": -14.537300109863281, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.3178768450872222, |
| "grad_norm": 5.23483825731752e-11, |
| "learning_rate": 1.3781618232332633e-05, |
| "logits/chosen": -58.066749572753906, |
| "logits/rejected": -61.83543014526367, |
| "logps/chosen": -3958.8125, |
| "logps/rejected": -4085.514892578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.226404190063477, |
| "rewards/margins": 26.033559799194336, |
| "rewards/rejected": -14.807156562805176, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.3238407633815417, |
| "grad_norm": 7.795271272925675e-08, |
| "learning_rate": 1.3566445999904174e-05, |
| "logits/chosen": -55.3619270324707, |
| "logits/rejected": -61.64369583129883, |
| "logps/chosen": -3446.460205078125, |
| "logps/rejected": -3815.262939453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.02111530303955, |
| "rewards/margins": 31.109447479248047, |
| "rewards/rejected": -17.088333129882812, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.329804681675861, |
| "grad_norm": 5.124166690961829e-08, |
| "learning_rate": 1.3352339486017935e-05, |
| "logits/chosen": -56.43516159057617, |
| "logits/rejected": -60.59318161010742, |
| "logps/chosen": -3860.68896484375, |
| "logps/rejected": -4440.08740234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.916462898254395, |
| "rewards/margins": 30.730871200561523, |
| "rewards/rejected": -18.814407348632812, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.3357685999701805, |
| "grad_norm": 2.5762432187548256e-09, |
| "learning_rate": 1.3139318647485411e-05, |
| "logits/chosen": -59.685874938964844, |
| "logits/rejected": -63.754478454589844, |
| "logps/chosen": -4016.92333984375, |
| "logps/rejected": -4015.01025390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.248187065124512, |
| "rewards/margins": 28.233264923095703, |
| "rewards/rejected": -15.985074996948242, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.3417325182644997, |
| "grad_norm": 8.210008672904223e-06, |
| "learning_rate": 1.2927403339922556e-05, |
| "logits/chosen": -58.01857376098633, |
| "logits/rejected": -59.8955078125, |
| "logps/chosen": -3789.346923828125, |
| "logps/rejected": -4025.25927734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.103419303894043, |
| "rewards/margins": 29.54793357849121, |
| "rewards/rejected": -16.44451332092285, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.347696436558819, |
| "grad_norm": 1.7891031234285037e-07, |
| "learning_rate": 1.2716613315899112e-05, |
| "logits/chosen": -56.45894241333008, |
| "logits/rejected": -61.15498733520508, |
| "logps/chosen": -3575.19189453125, |
| "logps/rejected": -3521.14501953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.165433883666992, |
| "rewards/margins": 26.995624542236328, |
| "rewards/rejected": -13.830190658569336, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.3536603548531385, |
| "grad_norm": 1.800779045879608e-06, |
| "learning_rate": 1.2506968223097431e-05, |
| "logits/chosen": -55.29785919189453, |
| "logits/rejected": -58.99498748779297, |
| "logps/chosen": -3407.56884765625, |
| "logps/rejected": -3411.91064453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.742193222045898, |
| "rewards/margins": 25.165752410888672, |
| "rewards/rejected": -12.423562049865723, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.359624273147458, |
| "grad_norm": 2.989838321809657e-06, |
| "learning_rate": 1.229848760248112e-05, |
| "logits/chosen": -57.45177459716797, |
| "logits/rejected": -58.72307205200195, |
| "logps/chosen": -3568.59765625, |
| "logps/rejected": -3394.478515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.099716186523438, |
| "rewards/margins": 27.246999740600586, |
| "rewards/rejected": -13.147282600402832, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.3655881914417773, |
| "grad_norm": 5.450796152217663e-07, |
| "learning_rate": 1.2091190886473644e-05, |
| "logits/chosen": -56.60668182373047, |
| "logits/rejected": -61.323768615722656, |
| "logps/chosen": -3702.658203125, |
| "logps/rejected": -3887.23193359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.3460693359375, |
| "rewards/margins": 27.915029525756836, |
| "rewards/rejected": -14.568957328796387, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.3715521097360965, |
| "grad_norm": 6.116428608038404e-08, |
| "learning_rate": 1.1885097397147063e-05, |
| "logits/chosen": -58.4415168762207, |
| "logits/rejected": -63.7178840637207, |
| "logps/chosen": -3674.389892578125, |
| "logps/rejected": -3665.013671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.25750732421875, |
| "rewards/margins": 26.048290252685547, |
| "rewards/rejected": -13.79078197479248, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.377516028030416, |
| "grad_norm": 2.8665971285590786e-07, |
| "learning_rate": 1.1680226344420942e-05, |
| "logits/chosen": -58.254615783691406, |
| "logits/rejected": -62.49680709838867, |
| "logps/chosen": -3836.04052734375, |
| "logps/rejected": -4047.54150390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.538701057434082, |
| "rewards/margins": 26.963558197021484, |
| "rewards/rejected": -15.424860954284668, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.3834799463247354, |
| "grad_norm": 1.2907316886412445e-06, |
| "learning_rate": 1.147659682427189e-05, |
| "logits/chosen": -58.32746124267578, |
| "logits/rejected": -62.26505661010742, |
| "logps/chosen": -4025.608642578125, |
| "logps/rejected": -4167.8388671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.876118659973145, |
| "rewards/margins": 28.321285247802734, |
| "rewards/rejected": -16.44516944885254, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.3894438646190548, |
| "grad_norm": 2.1081565648728429e-07, |
| "learning_rate": 1.1274227816953584e-05, |
| "logits/chosen": -54.2985954284668, |
| "logits/rejected": -59.446044921875, |
| "logps/chosen": -3246.630859375, |
| "logps/rejected": -3621.38818359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.192728996276855, |
| "rewards/margins": 27.980316162109375, |
| "rewards/rejected": -14.78758716583252, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.3954077829133742, |
| "grad_norm": 4.903237282860573e-08, |
| "learning_rate": 1.1073138185227638e-05, |
| "logits/chosen": -58.693580627441406, |
| "logits/rejected": -61.3970832824707, |
| "logps/chosen": -3988.88671875, |
| "logps/rejected": -3924.40625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.711004257202148, |
| "rewards/margins": 29.38458251953125, |
| "rewards/rejected": -14.673579216003418, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.4013717012076934, |
| "grad_norm": 2.6177708605246153e-06, |
| "learning_rate": 1.0873346672605394e-05, |
| "logits/chosen": -54.318634033203125, |
| "logits/rejected": -58.23624801635742, |
| "logps/chosen": -3739.504638671875, |
| "logps/rejected": -4247.2490234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.0569429397583, |
| "rewards/margins": 29.031036376953125, |
| "rewards/rejected": -15.974093437194824, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.4073356195020128, |
| "grad_norm": 1.0652929631760344e-06, |
| "learning_rate": 1.0674871901600886e-05, |
| "logits/chosen": -57.78883743286133, |
| "logits/rejected": -61.051246643066406, |
| "logps/chosen": -4084.44091796875, |
| "logps/rejected": -4172.6962890625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.95031452178955, |
| "rewards/margins": 26.569732666015625, |
| "rewards/rejected": -13.619417190551758, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.4132995377963322, |
| "grad_norm": 9.611061102532403e-08, |
| "learning_rate": 1.047773237199497e-05, |
| "logits/chosen": -57.65983963012695, |
| "logits/rejected": -60.987876892089844, |
| "logps/chosen": -3898.65087890625, |
| "logps/rejected": -4089.94287109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.300247192382812, |
| "rewards/margins": 28.2935733795166, |
| "rewards/rejected": -15.993327140808105, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.4192634560906516, |
| "grad_norm": 1.8469700080459006e-05, |
| "learning_rate": 1.0281946459111022e-05, |
| "logits/chosen": -55.288063049316406, |
| "logits/rejected": -59.08159255981445, |
| "logps/chosen": -3633.615966796875, |
| "logps/rejected": -3830.56982421875, |
| "loss": 0.3076, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 12.845723152160645, |
| "rewards/margins": 25.96260643005371, |
| "rewards/rejected": -13.11688232421875, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.425227374384971, |
| "grad_norm": 6.146466446921295e-09, |
| "learning_rate": 1.0087532412102171e-05, |
| "logits/chosen": -54.926902770996094, |
| "logits/rejected": -59.40453338623047, |
| "logps/chosen": -3904.94677734375, |
| "logps/rejected": -4362.1982421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.936139106750488, |
| "rewards/margins": 26.841259002685547, |
| "rewards/rejected": -11.905121803283691, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.4311912926792902, |
| "grad_norm": 2.4089372345770244e-06, |
| "learning_rate": 9.894508352250281e-06, |
| "logits/chosen": -56.74151611328125, |
| "logits/rejected": -59.63338088989258, |
| "logps/chosen": -3725.58642578125, |
| "logps/rejected": -3677.700439453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.33104133605957, |
| "rewards/margins": 25.67470932006836, |
| "rewards/rejected": -13.343668937683105, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.4371552109736097, |
| "grad_norm": 5.329222219074836e-08, |
| "learning_rate": 9.702892271276882e-06, |
| "logits/chosen": -54.703086853027344, |
| "logits/rejected": -58.85982131958008, |
| "logps/chosen": -3626.12158203125, |
| "logps/rejected": -3548.104736328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.935752868652344, |
| "rewards/margins": 26.5059871673584, |
| "rewards/rejected": -13.570233345031738, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.443119129267929, |
| "grad_norm": 2.663332221430892e-09, |
| "learning_rate": 9.512702029666165e-06, |
| "logits/chosen": -58.17033767700195, |
| "logits/rejected": -60.57352828979492, |
| "logps/chosen": -4204.1767578125, |
| "logps/rejected": -4053.822998046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.819692611694336, |
| "rewards/margins": 27.93819808959961, |
| "rewards/rejected": -15.118505477905273, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.4490830475622483, |
| "grad_norm": 3.948618143567728e-08, |
| "learning_rate": 9.323955355000213e-06, |
| "logits/chosen": -57.46662521362305, |
| "logits/rejected": -58.235816955566406, |
| "logps/chosen": -3560.56298828125, |
| "logps/rejected": -3385.750732421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 16.08102035522461, |
| "rewards/margins": 27.879852294921875, |
| "rewards/rejected": -11.798830032348633, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.455046965856568, |
| "grad_norm": 5.0093671433160125e-08, |
| "learning_rate": 9.136669840306617e-06, |
| "logits/chosen": -54.47039794921875, |
| "logits/rejected": -57.519554138183594, |
| "logps/chosen": -3386.38916015625, |
| "logps/rejected": -3258.2373046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.689840316772461, |
| "rewards/margins": 28.17424964904785, |
| "rewards/rejected": -12.484407424926758, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.461010884150887, |
| "grad_norm": 4.318402080372152e-09, |
| "learning_rate": 8.950862942418634e-06, |
| "logits/chosen": -56.27559280395508, |
| "logits/rejected": -61.70929718017578, |
| "logps/chosen": -3825.42724609375, |
| "logps/rejected": -3962.33349609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.570101737976074, |
| "rewards/margins": 29.67506980895996, |
| "rewards/rejected": -15.10496711730957, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.4669748024452065, |
| "grad_norm": 2.655309344845591e-06, |
| "learning_rate": 8.766551980348035e-06, |
| "logits/chosen": -56.446922302246094, |
| "logits/rejected": -59.82023239135742, |
| "logps/chosen": -3807.442138671875, |
| "logps/rejected": -4322.15087890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.840948104858398, |
| "rewards/margins": 27.626562118530273, |
| "rewards/rejected": -11.785615921020508, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.472938720739526, |
| "grad_norm": 9.323419902784735e-08, |
| "learning_rate": 8.583754133670813e-06, |
| "logits/chosen": -54.80516815185547, |
| "logits/rejected": -58.1927490234375, |
| "logps/chosen": -3539.16064453125, |
| "logps/rejected": -3787.634033203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.8378267288208, |
| "rewards/margins": 27.45003890991211, |
| "rewards/rejected": -11.612213134765625, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.4789026390338451, |
| "grad_norm": 2.1703629045077832e-06, |
| "learning_rate": 8.402486440925875e-06, |
| "logits/chosen": -55.11442184448242, |
| "logits/rejected": -61.09314727783203, |
| "logps/chosen": -3670.315185546875, |
| "logps/rejected": -4020.887451171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.911749839782715, |
| "rewards/margins": 28.087337493896484, |
| "rewards/rejected": -16.175586700439453, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.4848665573281645, |
| "grad_norm": 2.716829214932659e-07, |
| "learning_rate": 8.222765798026888e-06, |
| "logits/chosen": -56.46864700317383, |
| "logits/rejected": -61.81829833984375, |
| "logps/chosen": -3806.19580078125, |
| "logps/rejected": -3518.6796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.517992973327637, |
| "rewards/margins": 27.79050636291504, |
| "rewards/rejected": -14.272509574890137, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.490830475622484, |
| "grad_norm": 1.8604217189022165e-07, |
| "learning_rate": 8.044608956687411e-06, |
| "logits/chosen": -58.91337966918945, |
| "logits/rejected": -61.38121795654297, |
| "logps/chosen": -4016.22509765625, |
| "logps/rejected": -3790.36279296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.345911979675293, |
| "rewards/margins": 27.247661590576172, |
| "rewards/rejected": -12.901748657226562, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.4967943939168034, |
| "grad_norm": 5.806775948968834e-09, |
| "learning_rate": 7.868032522859466e-06, |
| "logits/chosen": -54.46747589111328, |
| "logits/rejected": -58.573265075683594, |
| "logps/chosen": -3881.350341796875, |
| "logps/rejected": -4542.3095703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.201573371887207, |
| "rewards/margins": 29.226207733154297, |
| "rewards/rejected": -15.024637222290039, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.5027583122111228, |
| "grad_norm": 4.5620687671998894e-08, |
| "learning_rate": 7.69305295518572e-06, |
| "logits/chosen": -59.017433166503906, |
| "logits/rejected": -62.553138732910156, |
| "logps/chosen": -3937.80517578125, |
| "logps/rejected": -4334.24267578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.289695739746094, |
| "rewards/margins": 28.369304656982422, |
| "rewards/rejected": -18.079607009887695, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.508722230505442, |
| "grad_norm": 1.7904097648901995e-10, |
| "learning_rate": 7.5196865634653614e-06, |
| "logits/chosen": -56.08478546142578, |
| "logits/rejected": -60.103553771972656, |
| "logps/chosen": -3620.850341796875, |
| "logps/rejected": -3999.59521484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.94933795928955, |
| "rewards/margins": 27.5977840423584, |
| "rewards/rejected": -14.648447036743164, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.5146861487997616, |
| "grad_norm": 4.987779789189517e-07, |
| "learning_rate": 7.347949507133881e-06, |
| "logits/chosen": -57.28242874145508, |
| "logits/rejected": -61.029945373535156, |
| "logps/chosen": -3839.3828125, |
| "logps/rejected": -3679.442626953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.256357192993164, |
| "rewards/margins": 25.953781127929688, |
| "rewards/rejected": -11.697421073913574, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.5206500670940808, |
| "grad_norm": 1.5572302336508415e-09, |
| "learning_rate": 7.17785779375684e-06, |
| "logits/chosen": -57.10649871826172, |
| "logits/rejected": -61.82776641845703, |
| "logps/chosen": -3794.711669921875, |
| "logps/rejected": -3995.13916015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.982124328613281, |
| "rewards/margins": 29.219127655029297, |
| "rewards/rejected": -16.237003326416016, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.5266139853884002, |
| "grad_norm": 0.0003727490548044443, |
| "learning_rate": 7.009427277537828e-06, |
| "logits/chosen": -57.260643005371094, |
| "logits/rejected": -61.28925704956055, |
| "logps/chosen": -3952.393798828125, |
| "logps/rejected": -4409.12548828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.187792778015137, |
| "rewards/margins": 28.366008758544922, |
| "rewards/rejected": -15.178217887878418, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.5325779036827196, |
| "grad_norm": 3.760019032239548e-11, |
| "learning_rate": 6.842673657840684e-06, |
| "logits/chosen": -58.93050003051758, |
| "logits/rejected": -61.820579528808594, |
| "logps/chosen": -4122.65771484375, |
| "logps/rejected": -4377.88037109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.47004222869873, |
| "rewards/margins": 28.11465072631836, |
| "rewards/rejected": -14.644607543945312, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.5385418219770388, |
| "grad_norm": 1.33146016878527e-07, |
| "learning_rate": 6.6776124777261585e-06, |
| "logits/chosen": -58.728904724121094, |
| "logits/rejected": -62.30875778198242, |
| "logps/chosen": -3577.420654296875, |
| "logps/rejected": -3622.842529296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.283228874206543, |
| "rewards/margins": 28.720935821533203, |
| "rewards/rejected": -13.437705993652344, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.5445057402713582, |
| "grad_norm": 1.0433809372045744e-09, |
| "learning_rate": 6.514259122503169e-06, |
| "logits/chosen": -53.648033142089844, |
| "logits/rejected": -58.789031982421875, |
| "logps/chosen": -3476.70947265625, |
| "logps/rejected": -3617.780517578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.440394401550293, |
| "rewards/margins": 26.173446655273438, |
| "rewards/rejected": -12.733052253723145, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.5504696585656776, |
| "grad_norm": 8.736037448997536e-10, |
| "learning_rate": 6.35262881829472e-06, |
| "logits/chosen": -59.83369827270508, |
| "logits/rejected": -60.867530822753906, |
| "logps/chosen": -3929.28173828125, |
| "logps/rejected": -4287.845703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.677624702453613, |
| "rewards/margins": 29.104511260986328, |
| "rewards/rejected": -15.426889419555664, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.556433576859997, |
| "grad_norm": 1.1132956956316775e-07, |
| "learning_rate": 6.1927366306186865e-06, |
| "logits/chosen": -58.01744842529297, |
| "logits/rejected": -62.40656661987305, |
| "logps/chosen": -3270.4072265625, |
| "logps/rejected": -2893.09130859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.387075424194336, |
| "rewards/margins": 23.64480972290039, |
| "rewards/rejected": -10.257735252380371, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.5623974951543165, |
| "grad_norm": 2.7856225415234803e-07, |
| "learning_rate": 6.034597462983563e-06, |
| "logits/chosen": -58.883827209472656, |
| "logits/rejected": -64.26622009277344, |
| "logps/chosen": -3842.31640625, |
| "logps/rejected": -4679.39208984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.385858535766602, |
| "rewards/margins": 28.50748062133789, |
| "rewards/rejected": -14.121622085571289, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.5683614134486357, |
| "grad_norm": 1.1248067011138119e-07, |
| "learning_rate": 5.878226055499308e-06, |
| "logits/chosen": -60.66514205932617, |
| "logits/rejected": -65.04862976074219, |
| "logps/chosen": -3726.667236328125, |
| "logps/rejected": -4026.20068359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.686662673950195, |
| "rewards/margins": 27.384693145751953, |
| "rewards/rejected": -13.698030471801758, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.574325331742955, |
| "grad_norm": 5.872105361959257e-07, |
| "learning_rate": 5.72363698350343e-06, |
| "logits/chosen": -57.18046951293945, |
| "logits/rejected": -62.109901428222656, |
| "logps/chosen": -4155.00439453125, |
| "logps/rejected": -4609.71044921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.978708267211914, |
| "rewards/margins": 30.07086181640625, |
| "rewards/rejected": -15.092155456542969, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.5802892500372745, |
| "grad_norm": 1.7974322474856308e-07, |
| "learning_rate": 5.570844656202415e-06, |
| "logits/chosen": -56.97467803955078, |
| "logits/rejected": -60.40959930419922, |
| "logps/chosen": -3629.501220703125, |
| "logps/rejected": -4034.34765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.481059074401855, |
| "rewards/margins": 26.78216552734375, |
| "rewards/rejected": -12.301109313964844, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.586253168331594, |
| "grad_norm": 6.639849781109319e-11, |
| "learning_rate": 5.419863315328644e-06, |
| "logits/chosen": -60.214744567871094, |
| "logits/rejected": -63.88579559326172, |
| "logps/chosen": -3973.41015625, |
| "logps/rejected": -4298.8505859375, |
| "loss": 0.0013, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.445764541625977, |
| "rewards/margins": 27.013805389404297, |
| "rewards/rejected": -12.56804370880127, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.5922170866259133, |
| "grad_norm": 1.2835856821880043e-09, |
| "learning_rate": 5.270707033812952e-06, |
| "logits/chosen": -56.302101135253906, |
| "logits/rejected": -61.73807907104492, |
| "logps/chosen": -3850.40234375, |
| "logps/rejected": -3881.409423828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.187263488769531, |
| "rewards/margins": 29.23580551147461, |
| "rewards/rejected": -14.048541069030762, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.5981810049202325, |
| "grad_norm": 1.238020241878246e-09, |
| "learning_rate": 5.12338971447284e-06, |
| "logits/chosen": -56.36561965942383, |
| "logits/rejected": -60.4167594909668, |
| "logps/chosen": -3432.209716796875, |
| "logps/rejected": -3635.707763671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.321012496948242, |
| "rewards/margins": 28.88069725036621, |
| "rewards/rejected": -14.559684753417969, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.604144923214552, |
| "grad_norm": 5.2999196142877736e-09, |
| "learning_rate": 4.977925088716673e-06, |
| "logits/chosen": -56.03685760498047, |
| "logits/rejected": -58.90336227416992, |
| "logps/chosen": -3657.60498046875, |
| "logps/rejected": -3899.95751953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.800675392150879, |
| "rewards/margins": 26.530603408813477, |
| "rewards/rejected": -15.729925155639648, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.6101088415088713, |
| "grad_norm": 3.040783553773241e-12, |
| "learning_rate": 4.834326715263709e-06, |
| "logits/chosen": -57.493499755859375, |
| "logits/rejected": -61.91980743408203, |
| "logps/chosen": -3821.324951171875, |
| "logps/rejected": -3955.79296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.035219192504883, |
| "rewards/margins": 31.038372039794922, |
| "rewards/rejected": -16.003154754638672, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.6160727598031905, |
| "grad_norm": 5.735748942470309e-08, |
| "learning_rate": 4.692607978880334e-06, |
| "logits/chosen": -55.83156204223633, |
| "logits/rejected": -59.69164276123047, |
| "logps/chosen": -3918.27734375, |
| "logps/rejected": -3958.736328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.39517879486084, |
| "rewards/margins": 28.416271209716797, |
| "rewards/rejected": -16.02109146118164, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.6220366780975102, |
| "grad_norm": 1.513937263553089e-06, |
| "learning_rate": 4.552782089132457e-06, |
| "logits/chosen": -53.48059844970703, |
| "logits/rejected": -57.53205490112305, |
| "logps/chosen": -3965.768310546875, |
| "logps/rejected": -4178.44384765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.595451354980469, |
| "rewards/margins": 27.8395938873291, |
| "rewards/rejected": -14.24414348602295, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.6280005963918294, |
| "grad_norm": 1.2683202839980368e-05, |
| "learning_rate": 4.414862079154258e-06, |
| "logits/chosen": -57.488067626953125, |
| "logits/rejected": -62.132568359375, |
| "logps/chosen": -3671.482421875, |
| "logps/rejected": -3794.837890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.1273832321167, |
| "rewards/margins": 28.243274688720703, |
| "rewards/rejected": -16.115894317626953, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.6339645146861488, |
| "grad_norm": 4.4300445978251446e-08, |
| "learning_rate": 4.278860804433346e-06, |
| "logits/chosen": -55.68921661376953, |
| "logits/rejected": -59.8105583190918, |
| "logps/chosen": -4055.94580078125, |
| "logps/rejected": -4559.392578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.716020584106445, |
| "rewards/margins": 35.52238845825195, |
| "rewards/rejected": -21.806364059448242, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.6399284329804682, |
| "grad_norm": 2.3574155960659482e-08, |
| "learning_rate": 4.144790941612561e-06, |
| "logits/chosen": -57.612083435058594, |
| "logits/rejected": -60.485084533691406, |
| "logps/chosen": -3919.98046875, |
| "logps/rejected": -3707.94677734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.938611030578613, |
| "rewards/margins": 27.882177352905273, |
| "rewards/rejected": -13.943568229675293, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.6458923512747874, |
| "grad_norm": 1.973355574591551e-05, |
| "learning_rate": 4.012664987308326e-06, |
| "logits/chosen": -55.63753128051758, |
| "logits/rejected": -59.27739715576172, |
| "logps/chosen": -3538.80078125, |
| "logps/rejected": -3376.485595703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.036745071411133, |
| "rewards/margins": 26.072357177734375, |
| "rewards/rejected": -13.035612106323242, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.651856269569107, |
| "grad_norm": 2.2980448655118835e-09, |
| "learning_rate": 3.8824952569458675e-06, |
| "logits/chosen": -56.71158981323242, |
| "logits/rejected": -60.6939811706543, |
| "logps/chosen": -3697.500732421875, |
| "logps/rejected": -4019.428466796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.426570892333984, |
| "rewards/margins": 28.051671981811523, |
| "rewards/rejected": -15.625103950500488, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.6578201878634262, |
| "grad_norm": 2.817006627964247e-10, |
| "learning_rate": 3.754293883611307e-06, |
| "logits/chosen": -56.885704040527344, |
| "logits/rejected": -60.8448371887207, |
| "logps/chosen": -3909.04736328125, |
| "logps/rejected": -4620.33935546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.772366523742676, |
| "rewards/margins": 29.079452514648438, |
| "rewards/rejected": -16.307085037231445, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.6637841061577456, |
| "grad_norm": 1.7094137216844274e-09, |
| "learning_rate": 3.628072816920722e-06, |
| "logits/chosen": -55.26677703857422, |
| "logits/rejected": -60.0572395324707, |
| "logps/chosen": -3623.676513671875, |
| "logps/rejected": -3435.266357421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.957403182983398, |
| "rewards/margins": 28.258371353149414, |
| "rewards/rejected": -14.300966262817383, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.669748024452065, |
| "grad_norm": 1.561394569193908e-09, |
| "learning_rate": 3.5038438219063247e-06, |
| "logits/chosen": -58.582733154296875, |
| "logits/rejected": -61.522300720214844, |
| "logps/chosen": -3925.781982421875, |
| "logps/rejected": -4272.77783203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.12500286102295, |
| "rewards/margins": 27.7889404296875, |
| "rewards/rejected": -14.663938522338867, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.6757119427463842, |
| "grad_norm": 3.270694651291706e-05, |
| "learning_rate": 3.3816184779198566e-06, |
| "logits/chosen": -57.79584884643555, |
| "logits/rejected": -61.09257888793945, |
| "logps/chosen": -3688.829345703125, |
| "logps/rejected": -3789.216064453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.383466720581055, |
| "rewards/margins": 25.65230369567871, |
| "rewards/rejected": -13.268835067749023, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.6816758610407039, |
| "grad_norm": 0.0009348949533887208, |
| "learning_rate": 3.2614081775532935e-06, |
| "logits/chosen": -57.633819580078125, |
| "logits/rejected": -61.18571090698242, |
| "logps/chosen": -3827.751220703125, |
| "logps/rejected": -4190.44873046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.405567169189453, |
| "rewards/margins": 26.647628784179688, |
| "rewards/rejected": -15.24206256866455, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.687639779335023, |
| "grad_norm": 2.3347972728515742e-06, |
| "learning_rate": 3.143224125576913e-06, |
| "logits/chosen": -59.698570251464844, |
| "logits/rejected": -63.5836067199707, |
| "logps/chosen": -4135.7724609375, |
| "logps/rejected": -4294.7646484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.506874084472656, |
| "rewards/margins": 26.867511749267578, |
| "rewards/rejected": -16.360633850097656, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.6936036976293425, |
| "grad_norm": 3.288195557615836e-06, |
| "learning_rate": 3.0270773378949153e-06, |
| "logits/chosen": -57.68184280395508, |
| "logits/rejected": -60.27643966674805, |
| "logps/chosen": -3976.610595703125, |
| "logps/rejected": -4323.2333984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.625307083129883, |
| "rewards/margins": 30.234777450561523, |
| "rewards/rejected": -16.60947036743164, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.699567615923662, |
| "grad_norm": 1.8086376840642515e-10, |
| "learning_rate": 2.9129786405186517e-06, |
| "logits/chosen": -58.33183670043945, |
| "logits/rejected": -61.32403564453125, |
| "logps/chosen": -3671.446533203125, |
| "logps/rejected": -3879.734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.45142936706543, |
| "rewards/margins": 27.773706436157227, |
| "rewards/rejected": -14.32227611541748, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.705531534217981, |
| "grad_norm": 5.313737716505784e-08, |
| "learning_rate": 2.8009386685574873e-06, |
| "logits/chosen": -58.4184684753418, |
| "logits/rejected": -64.02459716796875, |
| "logps/chosen": -3511.401611328125, |
| "logps/rejected": -3506.68798828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.209698677062988, |
| "rewards/margins": 27.310054779052734, |
| "rewards/rejected": -15.10035514831543, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.7114954525123007, |
| "grad_norm": 2.9329979156500485e-07, |
| "learning_rate": 2.6909678652275617e-06, |
| "logits/chosen": -54.92981719970703, |
| "logits/rejected": -63.78539276123047, |
| "logps/chosen": -3624.64453125, |
| "logps/rejected": -4150.95263671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.3597993850708, |
| "rewards/margins": 32.095314025878906, |
| "rewards/rejected": -18.735517501831055, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.71745937080662, |
| "grad_norm": 2.407121968417414e-08, |
| "learning_rate": 2.583076480878352e-06, |
| "logits/chosen": -55.66535568237305, |
| "logits/rejected": -59.855445861816406, |
| "logps/chosen": -3529.068359375, |
| "logps/rejected": -3593.259765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.202707290649414, |
| "rewards/margins": 27.2900333404541, |
| "rewards/rejected": -13.08732795715332, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.7234232891009393, |
| "grad_norm": 5.434892091926713e-09, |
| "learning_rate": 2.477274572037236e-06, |
| "logits/chosen": -59.2020263671875, |
| "logits/rejected": -62.6593132019043, |
| "logps/chosen": -4125.734375, |
| "logps/rejected": -4534.7890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.742546081542969, |
| "rewards/margins": 28.544530868530273, |
| "rewards/rejected": -16.801984786987305, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.7293872073952588, |
| "grad_norm": 1.486068906819682e-11, |
| "learning_rate": 2.3735720004721325e-06, |
| "logits/chosen": -54.690345764160156, |
| "logits/rejected": -58.92433547973633, |
| "logps/chosen": -3601.94091796875, |
| "logps/rejected": -3842.4375, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.382122039794922, |
| "rewards/margins": 26.631744384765625, |
| "rewards/rejected": -14.249621391296387, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.735351125689578, |
| "grad_norm": 1.4909307122223936e-10, |
| "learning_rate": 2.2719784322722954e-06, |
| "logits/chosen": -58.35798263549805, |
| "logits/rejected": -61.391815185546875, |
| "logps/chosen": -3863.544189453125, |
| "logps/rejected": -3775.06787109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.62255859375, |
| "rewards/margins": 26.525253295898438, |
| "rewards/rejected": -13.902692794799805, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.7413150439838976, |
| "grad_norm": 1.7777512766770087e-05, |
| "learning_rate": 2.172503336947318e-06, |
| "logits/chosen": -58.63573455810547, |
| "logits/rejected": -61.71209716796875, |
| "logps/chosen": -3985.02197265625, |
| "logps/rejected": -4420.2685546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.010174751281738, |
| "rewards/margins": 28.709774017333984, |
| "rewards/rejected": -14.699602127075195, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.7472789622782168, |
| "grad_norm": 3.4102259860446793e-07, |
| "learning_rate": 2.0751559865445137e-06, |
| "logits/chosen": -55.8213005065918, |
| "logits/rejected": -58.88740921020508, |
| "logps/chosen": -3726.633544921875, |
| "logps/rejected": -3584.02880859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.5889310836792, |
| "rewards/margins": 27.472326278686523, |
| "rewards/rejected": -12.883394241333008, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.7532428805725362, |
| "grad_norm": 3.0110019366702545e-08, |
| "learning_rate": 1.9799454547846403e-06, |
| "logits/chosen": -58.080291748046875, |
| "logits/rejected": -65.03736114501953, |
| "logps/chosen": -3850.731689453125, |
| "logps/rejected": -3925.19140625, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.993995666503906, |
| "rewards/margins": 30.477624893188477, |
| "rewards/rejected": -19.483631134033203, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.7592067988668556, |
| "grad_norm": 8.025313746884422e-11, |
| "learning_rate": 1.8868806162161745e-06, |
| "logits/chosen": -55.89226150512695, |
| "logits/rejected": -59.2379150390625, |
| "logps/chosen": -3627.599609375, |
| "logps/rejected": -3741.21728515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.812711715698242, |
| "rewards/margins": 27.617528915405273, |
| "rewards/rejected": -13.804819107055664, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.7651707171611748, |
| "grad_norm": 9.478714702026991e-08, |
| "learning_rate": 1.7959701453880845e-06, |
| "logits/chosen": -53.85209274291992, |
| "logits/rejected": -58.788795471191406, |
| "logps/chosen": -3635.917236328125, |
| "logps/rejected": -3698.177734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.880119323730469, |
| "rewards/margins": 27.0487117767334, |
| "rewards/rejected": -13.16859245300293, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.7711346354554942, |
| "grad_norm": 1.0980542128891102e-06, |
| "learning_rate": 1.7072225160412987e-06, |
| "logits/chosen": -55.5919189453125, |
| "logits/rejected": -59.72821044921875, |
| "logps/chosen": -3429.30859375, |
| "logps/rejected": -3813.98291015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.354362487792969, |
| "rewards/margins": 26.6251277923584, |
| "rewards/rejected": -13.270769119262695, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.7770985537498136, |
| "grad_norm": 8.783963556524554e-10, |
| "learning_rate": 1.6206460003188484e-06, |
| "logits/chosen": -60.265892028808594, |
| "logits/rejected": -63.906089782714844, |
| "logps/chosen": -4072.217529296875, |
| "logps/rejected": -4386.146484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.840473175048828, |
| "rewards/margins": 27.794742584228516, |
| "rewards/rejected": -15.954269409179688, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.783062472044133, |
| "grad_norm": 2.523800057829817e-09, |
| "learning_rate": 1.536248667994855e-06, |
| "logits/chosen": -58.744789123535156, |
| "logits/rejected": -61.5314826965332, |
| "logps/chosen": -3976.684814453125, |
| "logps/rejected": -4438.0166015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.870981216430664, |
| "rewards/margins": 27.51932716369629, |
| "rewards/rejected": -14.648345947265625, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.7890263903384525, |
| "grad_norm": 5.33410729985917e-06, |
| "learning_rate": 1.4540383857223255e-06, |
| "logits/chosen": -56.88322830200195, |
| "logits/rejected": -61.4012565612793, |
| "logps/chosen": -3691.760498046875, |
| "logps/rejected": -3509.0, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.339391708374023, |
| "rewards/margins": 26.1422119140625, |
| "rewards/rejected": -11.802818298339844, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.7949903086327716, |
| "grad_norm": 0.003620662959292531, |
| "learning_rate": 1.3740228162999164e-06, |
| "logits/chosen": -55.299964904785156, |
| "logits/rejected": -60.9326171875, |
| "logps/chosen": -3670.139892578125, |
| "logps/rejected": -4088.413330078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.467697143554688, |
| "rewards/margins": 26.333303451538086, |
| "rewards/rejected": -13.865605354309082, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.800954226927091, |
| "grad_norm": 1.163293128492171e-09, |
| "learning_rate": 1.2962094179576723e-06, |
| "logits/chosen": -58.49831008911133, |
| "logits/rejected": -64.70086669921875, |
| "logps/chosen": -3779.00732421875, |
| "logps/rejected": -4333.802734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 10.059266090393066, |
| "rewards/margins": 27.809616088867188, |
| "rewards/rejected": -17.750350952148438, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.8069181452214105, |
| "grad_norm": 1.3129453968474536e-08, |
| "learning_rate": 1.2206054436618624e-06, |
| "logits/chosen": -54.48987579345703, |
| "logits/rejected": -60.60657501220703, |
| "logps/chosen": -3450.112060546875, |
| "logps/rejected": -3492.62353515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.539463996887207, |
| "rewards/margins": 27.055255889892578, |
| "rewards/rejected": -13.515789985656738, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.8128820635157297, |
| "grad_norm": 1.3139845123077976e-06, |
| "learning_rate": 1.1472179404389133e-06, |
| "logits/chosen": -57.91154098510742, |
| "logits/rejected": -60.720306396484375, |
| "logps/chosen": -3706.28857421875, |
| "logps/rejected": -3608.96484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.64875316619873, |
| "rewards/margins": 25.565021514892578, |
| "rewards/rejected": -12.916269302368164, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.8188459818100493, |
| "grad_norm": 1.7036876798215417e-08, |
| "learning_rate": 1.0760537487185807e-06, |
| "logits/chosen": -55.9576301574707, |
| "logits/rejected": -60.521888732910156, |
| "logps/chosen": -3810.43359375, |
| "logps/rejected": -3926.3671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.514657974243164, |
| "rewards/margins": 28.64280128479004, |
| "rewards/rejected": -15.128148078918457, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.8248099001043685, |
| "grad_norm": 4.0257355067296885e-06, |
| "learning_rate": 1.0071195016963381e-06, |
| "logits/chosen": -62.52231979370117, |
| "logits/rejected": -66.21368408203125, |
| "logps/chosen": -4432.63916015625, |
| "logps/rejected": -4683.30517578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.339556694030762, |
| "rewards/margins": 29.099838256835938, |
| "rewards/rejected": -17.76028060913086, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.830773818398688, |
| "grad_norm": 6.330567181578317e-09, |
| "learning_rate": 9.404216247151043e-07, |
| "logits/chosen": -52.628440856933594, |
| "logits/rejected": -57.48138427734375, |
| "logps/chosen": -3616.740966796875, |
| "logps/rejected": -3667.90234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.859712600708008, |
| "rewards/margins": 28.531396865844727, |
| "rewards/rejected": -15.671684265136719, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.8367377366930073, |
| "grad_norm": 7.075045260762636e-08, |
| "learning_rate": 8.75966334666345e-07, |
| "logits/chosen": -56.22023391723633, |
| "logits/rejected": -59.01957321166992, |
| "logps/chosen": -3757.162841796875, |
| "logps/rejected": -3768.07568359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.878870964050293, |
| "rewards/margins": 27.17599105834961, |
| "rewards/rejected": -15.297121047973633, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.8427016549873265, |
| "grad_norm": 1.6106888045541723e-10, |
| "learning_rate": 8.137596394105884e-07, |
| "logits/chosen": -57.78483200073242, |
| "logits/rejected": -62.688690185546875, |
| "logps/chosen": -3791.12255859375, |
| "logps/rejected": -4281.9208984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.808479309082031, |
| "rewards/margins": 31.255359649658203, |
| "rewards/rejected": -18.446876525878906, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.8486655732816462, |
| "grad_norm": 1.2802472623696382e-11, |
| "learning_rate": 7.538073372174243e-07, |
| "logits/chosen": -54.56230926513672, |
| "logits/rejected": -58.21129608154297, |
| "logps/chosen": -3462.29345703125, |
| "logps/rejected": -3649.39794921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.672677993774414, |
| "rewards/margins": 26.28127670288086, |
| "rewards/rejected": -14.608599662780762, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.8546294915759653, |
| "grad_norm": 1.6649872452978443e-11, |
| "learning_rate": 6.961150162250768e-07, |
| "logits/chosen": -53.14113235473633, |
| "logits/rejected": -58.1319694519043, |
| "logps/chosen": -3658.533203125, |
| "logps/rejected": -3746.419189453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.092714309692383, |
| "rewards/margins": 26.7106990814209, |
| "rewards/rejected": -14.617985725402832, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.8605934098702848, |
| "grad_norm": 2.5056786398636177e-05, |
| "learning_rate": 6.406880539195192e-07, |
| "logits/chosen": -57.97718048095703, |
| "logits/rejected": -60.9071044921875, |
| "logps/chosen": -4105.29736328125, |
| "logps/rejected": -4301.6162109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.045524597167969, |
| "rewards/margins": 30.137821197509766, |
| "rewards/rejected": -16.092296600341797, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.8665573281646042, |
| "grad_norm": 1.519443060260528e-08, |
| "learning_rate": 5.875316166332301e-07, |
| "logits/chosen": -52.88419723510742, |
| "logits/rejected": -56.966552734375, |
| "logps/chosen": -3373.383544921875, |
| "logps/rejected": -3637.07861328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.798542022705078, |
| "rewards/margins": 27.759979248046875, |
| "rewards/rejected": -15.961441040039062, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.8725212464589234, |
| "grad_norm": 1.0141298467658544e-08, |
| "learning_rate": 5.366506590636728e-07, |
| "logits/chosen": -56.66168975830078, |
| "logits/rejected": -60.23418426513672, |
| "logps/chosen": -3948.93212890625, |
| "logps/rejected": -4152.4853515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.425600051879883, |
| "rewards/margins": 26.627605438232422, |
| "rewards/rejected": -13.202006340026855, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.878485164753243, |
| "grad_norm": 1.0312219522745636e-08, |
| "learning_rate": 4.880499238114289e-07, |
| "logits/chosen": -56.00712203979492, |
| "logits/rejected": -60.405303955078125, |
| "logps/chosen": -3732.51220703125, |
| "logps/rejected": -4072.91943359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.824224472045898, |
| "rewards/margins": 29.8277587890625, |
| "rewards/rejected": -17.00353240966797, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.8844490830475622, |
| "grad_norm": 3.670846737691136e-08, |
| "learning_rate": 4.4173394093816323e-07, |
| "logits/chosen": -57.95183181762695, |
| "logits/rejected": -62.4155158996582, |
| "logps/chosen": -4040.39990234375, |
| "logps/rejected": -4227.3154296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.566617012023926, |
| "rewards/margins": 30.018875122070312, |
| "rewards/rejected": -15.452255249023438, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.8904130013418816, |
| "grad_norm": 7.669514729968796e-07, |
| "learning_rate": 3.977070275443889e-07, |
| "logits/chosen": -54.29924774169922, |
| "logits/rejected": -58.9632682800293, |
| "logps/chosen": -3373.657470703125, |
| "logps/rejected": -3459.740966796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.414060592651367, |
| "rewards/margins": 27.17255210876465, |
| "rewards/rejected": -15.758489608764648, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.896376919636201, |
| "grad_norm": 7.890444742031377e-09, |
| "learning_rate": 3.5597328736704515e-07, |
| "logits/chosen": -57.19881057739258, |
| "logits/rejected": -59.535736083984375, |
| "logps/chosen": -3508.376220703125, |
| "logps/rejected": -4312.44189453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.423650741577148, |
| "rewards/margins": 30.402755737304688, |
| "rewards/rejected": -16.97910499572754, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.9023408379305202, |
| "grad_norm": 0.0008170054643414915, |
| "learning_rate": 3.1653661039700856e-07, |
| "logits/chosen": -53.190582275390625, |
| "logits/rejected": -59.55742263793945, |
| "logps/chosen": -3487.789794921875, |
| "logps/rejected": -3961.377685546875, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.425531387329102, |
| "rewards/margins": 26.997051239013672, |
| "rewards/rejected": -13.571520805358887, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.9083047562248399, |
| "grad_norm": 1.0537237926655507e-07, |
| "learning_rate": 2.794006725165055e-07, |
| "logits/chosen": -58.54961395263672, |
| "logits/rejected": -61.74705123901367, |
| "logps/chosen": -3884.13525390625, |
| "logps/rejected": -3678.408935546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.533299446105957, |
| "rewards/margins": 25.77750015258789, |
| "rewards/rejected": -12.244199752807617, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.914268674519159, |
| "grad_norm": 3.401453625428985e-07, |
| "learning_rate": 2.4456893515647507e-07, |
| "logits/chosen": -55.8200569152832, |
| "logits/rejected": -60.309791564941406, |
| "logps/chosen": -3641.48876953125, |
| "logps/rejected": -4029.09130859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.986165046691895, |
| "rewards/margins": 28.325210571289062, |
| "rewards/rejected": -15.339044570922852, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.9202325928134785, |
| "grad_norm": 7.95227883543248e-09, |
| "learning_rate": 2.1204464497393828e-07, |
| "logits/chosen": -58.28154754638672, |
| "logits/rejected": -63.397918701171875, |
| "logps/chosen": -3738.774169921875, |
| "logps/rejected": -4127.396484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.601153373718262, |
| "rewards/margins": 32.9376106262207, |
| "rewards/rejected": -20.33645248413086, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.9261965111077979, |
| "grad_norm": 5.5718683142913505e-06, |
| "learning_rate": 1.818308335493707e-07, |
| "logits/chosen": -57.85089874267578, |
| "logits/rejected": -62.124717712402344, |
| "logps/chosen": -3759.594482421875, |
| "logps/rejected": -4074.05859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.022012710571289, |
| "rewards/margins": 27.768798828125, |
| "rewards/rejected": -15.746786117553711, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.932160429402117, |
| "grad_norm": 2.9493682518477726e-07, |
| "learning_rate": 1.539303171041423e-07, |
| "logits/chosen": -56.495628356933594, |
| "logits/rejected": -59.803489685058594, |
| "logps/chosen": -3937.475341796875, |
| "logps/rejected": -4117.28955078125, |
| "loss": 0.0181, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": 13.402850151062012, |
| "rewards/margins": 29.56199073791504, |
| "rewards/rejected": -16.159137725830078, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.9381243476964367, |
| "grad_norm": 2.0169439451933613e-09, |
| "learning_rate": 1.2834569623800806e-07, |
| "logits/chosen": -56.14719772338867, |
| "logits/rejected": -61.06328201293945, |
| "logps/chosen": -3900.921142578125, |
| "logps/rejected": -3950.700439453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 11.294220924377441, |
| "rewards/margins": 27.905467987060547, |
| "rewards/rejected": -16.611248016357422, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.944088265990756, |
| "grad_norm": 2.454358383197075e-10, |
| "learning_rate": 1.0507935568670469e-07, |
| "logits/chosen": -59.91279983520508, |
| "logits/rejected": -64.65019226074219, |
| "logps/chosen": -4057.24365234375, |
| "logps/rejected": -4658.7998046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.831171035766602, |
| "rewards/margins": 30.55777359008789, |
| "rewards/rejected": -16.726600646972656, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.9500521842850753, |
| "grad_norm": 6.98909570928663e-05, |
| "learning_rate": 8.413346409967548e-08, |
| "logits/chosen": -57.639564514160156, |
| "logits/rejected": -60.486106872558594, |
| "logps/chosen": -3805.05908203125, |
| "logps/rejected": -3502.89208984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.168159484863281, |
| "rewards/margins": 26.42257308959961, |
| "rewards/rejected": -13.254412651062012, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.9560161025793947, |
| "grad_norm": 9.799998590409587e-10, |
| "learning_rate": 6.5509973837935e-08, |
| "logits/chosen": -56.7869987487793, |
| "logits/rejected": -63.3160285949707, |
| "logps/chosen": -3626.40771484375, |
| "logps/rejected": -4573.03466796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.337489128112793, |
| "rewards/margins": 31.131816864013672, |
| "rewards/rejected": -16.794330596923828, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.961980020873714, |
| "grad_norm": 8.010190867935307e-06, |
| "learning_rate": 4.921062079207839e-08, |
| "logits/chosen": -58.120140075683594, |
| "logits/rejected": -62.119712829589844, |
| "logps/chosen": -3722.432861328125, |
| "logps/rejected": -4000.43701171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 15.0454740524292, |
| "rewards/margins": 28.300750732421875, |
| "rewards/rejected": -13.255276679992676, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.9679439391680336, |
| "grad_norm": 0.012528502382338047, |
| "learning_rate": 3.5236924220494186e-08, |
| "logits/chosen": -57.49330520629883, |
| "logits/rejected": -60.7618522644043, |
| "logps/chosen": -4048.438232421875, |
| "logps/rejected": -4347.9658203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.4490966796875, |
| "rewards/margins": 29.223918914794922, |
| "rewards/rejected": -15.774820327758789, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.9739078574623528, |
| "grad_norm": 5.5174933066837184e-08, |
| "learning_rate": 2.3590186607733154e-08, |
| "logits/chosen": -57.35243606567383, |
| "logits/rejected": -63.4393310546875, |
| "logps/chosen": -3737.04150390625, |
| "logps/rejected": -4001.02978515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 14.624621391296387, |
| "rewards/margins": 27.510913848876953, |
| "rewards/rejected": -12.886293411254883, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.9798717757566722, |
| "grad_norm": 2.4484758665010986e-10, |
| "learning_rate": 1.4271493543133174e-08, |
| "logits/chosen": -59.93109130859375, |
| "logits/rejected": -63.264732360839844, |
| "logps/chosen": -3958.082763671875, |
| "logps/rejected": -4254.8232421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.18006420135498, |
| "rewards/margins": 28.5799503326416, |
| "rewards/rejected": -15.399887084960938, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.9858356940509916, |
| "grad_norm": 4.756313121134781e-09, |
| "learning_rate": 7.281713619605723e-09, |
| "logits/chosen": -56.019737243652344, |
| "logits/rejected": -59.69663619995117, |
| "logps/chosen": -4050.05908203125, |
| "logps/rejected": -4002.97802734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.66505241394043, |
| "rewards/margins": 27.011245727539062, |
| "rewards/rejected": -13.346193313598633, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.9917996123453108, |
| "grad_norm": 1.108175638364628e-05, |
| "learning_rate": 2.6214983526867686e-09, |
| "logits/chosen": -56.985069274902344, |
| "logits/rejected": -62.2716178894043, |
| "logps/chosen": -3683.37255859375, |
| "logps/rejected": -3870.92041015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 13.052160263061523, |
| "rewards/margins": 27.678844451904297, |
| "rewards/rejected": -14.626681327819824, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.9977635306396302, |
| "grad_norm": 2.601581456929125e-07, |
| "learning_rate": 2.912821198075566e-10, |
| "logits/chosen": -56.11452102661133, |
| "logits/rejected": -60.453453063964844, |
| "logps/chosen": -3346.564453125, |
| "logps/rejected": -3288.348388671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 12.876147270202637, |
| "rewards/margins": 26.675273895263672, |
| "rewards/rejected": -13.799127578735352, |
| "step": 3350 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3354, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|