{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.014992503748126, "eval_steps": 500, "global_step": 339, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0029985007496251873, "grad_norm": 98.0, "learning_rate": 0.0, "logits/chosen": -1.517578125, "logits/rejected": -1.533203125, "logps/chosen": -767.25, "logps/rejected": -836.75, "loss": 1.3203125, "nll_loss": 1.28076171875, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.005997001499250375, "grad_norm": 119.0, "learning_rate": 7.692307692307694e-07, "logits/chosen": -1.689453125, "logits/rejected": -1.634765625, "logps/chosen": -1800.0, "logps/rejected": -1910.0, "loss": 1.49609375, "nll_loss": 1.9921875, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 2 }, { "epoch": 0.008995502248875561, "grad_norm": 84.5, "learning_rate": 1.5384615384615387e-06, "logits/chosen": -1.71484375, "logits/rejected": -1.708984375, "logps/chosen": -1291.5, "logps/rejected": -1344.75, "loss": 1.4296875, "nll_loss": 1.6005859375, "rewards/accuracies": 0.25, "rewards/chosen": -0.0390625, "rewards/margins": -0.1875, "rewards/rejected": 0.1484375, "step": 3 }, { "epoch": 0.01199400299850075, "grad_norm": 52.0, "learning_rate": 2.307692307692308e-06, "logits/chosen": -1.529296875, "logits/rejected": -1.54296875, "logps/chosen": -613.5, "logps/rejected": -454.0, "loss": 1.189453125, "nll_loss": 0.8017578125, "rewards/accuracies": 0.375, "rewards/chosen": 0.109375, "rewards/margins": 0.078125, "rewards/rejected": 0.03125, "step": 4 }, { "epoch": 0.014992503748125937, "grad_norm": 84.0, "learning_rate": 3.0769230769230774e-06, "logits/chosen": -1.48828125, "logits/rejected": -1.607421875, "logps/chosen": -729.25, "logps/rejected": -897.5, "loss": 1.251953125, "nll_loss": 1.33984375, "rewards/accuracies": 0.625, "rewards/chosen": 0.130859375, "rewards/margins": 0.451171875, "rewards/rejected": -0.3203125, "step": 5 }, { "epoch": 0.017991004497751123, "grad_norm": 83.5, "learning_rate": 3.846153846153847e-06, "logits/chosen": -1.712890625, "logits/rejected": -1.6484375, "logps/chosen": -1176.0, "logps/rejected": -1174.0, "loss": 1.498046875, "nll_loss": 1.806640625, "rewards/accuracies": 0.5, "rewards/chosen": 0.1484375, "rewards/margins": -0.4453125, "rewards/rejected": 0.59375, "step": 6 }, { "epoch": 0.020989505247376312, "grad_norm": 110.0, "learning_rate": 4.615384615384616e-06, "logits/chosen": -1.751953125, "logits/rejected": -1.681640625, "logps/chosen": -1940.0, "logps/rejected": -1952.0, "loss": 1.505859375, "nll_loss": 2.5390625, "rewards/accuracies": 0.375, "rewards/chosen": 0.0078125, "rewards/margins": 0.7763671875, "rewards/rejected": -0.7685546875, "step": 7 }, { "epoch": 0.0239880059970015, "grad_norm": 113.5, "learning_rate": 5.384615384615385e-06, "logits/chosen": -1.564453125, "logits/rejected": -1.61328125, "logps/chosen": -1705.0, "logps/rejected": -1841.0, "loss": 1.314453125, "nll_loss": 1.87841796875, "rewards/accuracies": 0.75, "rewards/chosen": 0.46875, "rewards/margins": 1.078125, "rewards/rejected": -0.609375, "step": 8 }, { "epoch": 0.026986506746626688, "grad_norm": 103.0, "learning_rate": 6.153846153846155e-06, "logits/chosen": -1.748046875, "logits/rejected": -1.69140625, "logps/chosen": -1655.5, "logps/rejected": -1663.75, "loss": 1.427734375, "nll_loss": 1.86572265625, "rewards/accuracies": 0.375, "rewards/chosen": 0.0703125, "rewards/margins": 0.2421875, "rewards/rejected": -0.171875, "step": 9 }, { "epoch": 0.029985007496251874, "grad_norm": 77.0, "learning_rate": 6.923076923076923e-06, "logits/chosen": -1.65625, "logits/rejected": -1.619140625, "logps/chosen": -1045.5, "logps/rejected": -1078.375, "loss": 1.365234375, "nll_loss": 1.49609375, "rewards/accuracies": 0.625, "rewards/chosen": 0.26171875, "rewards/margins": 0.02734375, "rewards/rejected": 0.234375, "step": 10 }, { "epoch": 0.03298350824587706, "grad_norm": 87.5, "learning_rate": 7.692307692307694e-06, "logits/chosen": -1.578125, "logits/rejected": -1.595703125, "logps/chosen": -1101.0, "logps/rejected": -1194.5, "loss": 1.48828125, "nll_loss": 1.697265625, "rewards/accuracies": 0.5, "rewards/chosen": 0.328125, "rewards/margins": -0.4072265625, "rewards/rejected": 0.7353515625, "step": 11 }, { "epoch": 0.035982008995502246, "grad_norm": 92.5, "learning_rate": 8.461538461538462e-06, "logits/chosen": -1.67578125, "logits/rejected": -1.646484375, "logps/chosen": -871.5, "logps/rejected": -1011.0, "loss": 1.291015625, "nll_loss": 1.615234375, "rewards/accuracies": 0.75, "rewards/chosen": 0.744140625, "rewards/margins": 0.650390625, "rewards/rejected": 0.09375, "step": 12 }, { "epoch": 0.038980509745127435, "grad_norm": 66.0, "learning_rate": 9.230769230769232e-06, "logits/chosen": -1.681640625, "logits/rejected": -1.6484375, "logps/chosen": -1007.625, "logps/rejected": -1010.53125, "loss": 1.34375, "nll_loss": 1.3955078125, "rewards/accuracies": 0.625, "rewards/chosen": 0.408203125, "rewards/margins": -0.0322265625, "rewards/rejected": 0.4404296875, "step": 13 }, { "epoch": 0.041979010494752625, "grad_norm": 79.5, "learning_rate": 1e-05, "logits/chosen": -1.744140625, "logits/rejected": -1.69921875, "logps/chosen": -1097.0, "logps/rejected": -1043.25, "loss": 1.30859375, "nll_loss": 1.47265625, "rewards/accuracies": 0.5, "rewards/chosen": 0.62890625, "rewards/margins": 0.30859375, "rewards/rejected": 0.3203125, "step": 14 }, { "epoch": 0.044977511244377814, "grad_norm": 98.0, "learning_rate": 9.999896390730872e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.666015625, "logps/chosen": -1499.0, "logps/rejected": -1722.0, "loss": 1.283203125, "nll_loss": 1.810546875, "rewards/accuracies": 0.625, "rewards/chosen": 1.1875, "rewards/margins": 1.0, "rewards/rejected": 0.1875, "step": 15 }, { "epoch": 0.047976011994003, "grad_norm": 104.5, "learning_rate": 9.99958556721744e-06, "logits/chosen": -1.6171875, "logits/rejected": -1.638671875, "logps/chosen": -1740.0, "logps/rejected": -1810.0, "loss": 1.314453125, "nll_loss": 2.267578125, "rewards/accuracies": 0.875, "rewards/chosen": 1.13671875, "rewards/margins": 1.53125, "rewards/rejected": -0.39453125, "step": 16 }, { "epoch": 0.050974512743628186, "grad_norm": 99.5, "learning_rate": 9.99906754234138e-06, "logits/chosen": -1.6953125, "logits/rejected": -1.662109375, "logps/chosen": -1371.75, "logps/rejected": -1211.375, "loss": 1.1796875, "nll_loss": 1.88818359375, "rewards/accuracies": 1.0, "rewards/chosen": 0.482421875, "rewards/margins": 1.986328125, "rewards/rejected": -1.50341796875, "step": 17 }, { "epoch": 0.053973013493253376, "grad_norm": 84.0, "learning_rate": 9.998342337571566e-06, "logits/chosen": -1.689453125, "logits/rejected": -1.62890625, "logps/chosen": -1658.0, "logps/rejected": -1794.0, "loss": 1.21484375, "nll_loss": 2.17578125, "rewards/accuracies": 1.0, "rewards/chosen": 0.765625, "rewards/margins": 2.39453125, "rewards/rejected": -1.62890625, "step": 18 }, { "epoch": 0.05697151424287856, "grad_norm": 98.0, "learning_rate": 9.997409982963173e-06, "logits/chosen": -1.658203125, "logits/rejected": -1.62109375, "logps/chosen": -1526.0, "logps/rejected": -1760.0, "loss": 1.23828125, "nll_loss": 1.951171875, "rewards/accuracies": 0.75, "rewards/chosen": 0.67578125, "rewards/margins": 1.49609375, "rewards/rejected": -0.8203125, "step": 19 }, { "epoch": 0.05997001499250375, "grad_norm": 81.5, "learning_rate": 9.996270517156431e-06, "logits/chosen": -1.70703125, "logits/rejected": -1.681640625, "logps/chosen": -1257.0, "logps/rejected": -1410.0, "loss": 1.08984375, "nll_loss": 1.80859375, "rewards/accuracies": 0.875, "rewards/chosen": 0.947265625, "rewards/margins": 2.205078125, "rewards/rejected": -1.2578125, "step": 20 }, { "epoch": 0.06296851574212893, "grad_norm": 68.5, "learning_rate": 9.994923987375029e-06, "logits/chosen": -1.673828125, "logits/rejected": -1.6796875, "logps/chosen": -1484.0, "logps/rejected": -1427.0, "loss": 1.017578125, "nll_loss": 2.109375, "rewards/accuracies": 0.875, "rewards/chosen": 1.3046875, "rewards/margins": 3.8828125, "rewards/rejected": -2.578125, "step": 21 }, { "epoch": 0.06596701649175413, "grad_norm": 84.0, "learning_rate": 9.993370449424153e-06, "logits/chosen": -1.69140625, "logits/rejected": -1.666015625, "logps/chosen": -1314.0, "logps/rejected": -1415.0, "loss": 1.201171875, "nll_loss": 1.6083984375, "rewards/accuracies": 0.875, "rewards/chosen": 0.44921875, "rewards/margins": 1.31640625, "rewards/rejected": -0.8671875, "step": 22 }, { "epoch": 0.06896551724137931, "grad_norm": 106.0, "learning_rate": 9.991609967688177e-06, "logits/chosen": -1.70703125, "logits/rejected": -1.712890625, "logps/chosen": -1092.625, "logps/rejected": -1225.0, "loss": 1.103515625, "nll_loss": 1.69384765625, "rewards/accuracies": 1.0, "rewards/chosen": 1.525390625, "rewards/margins": 2.474609375, "rewards/rejected": -0.94921875, "step": 23 }, { "epoch": 0.07196401799100449, "grad_norm": 72.5, "learning_rate": 9.98964261512799e-06, "logits/chosen": -1.68359375, "logits/rejected": -1.666015625, "logps/chosen": -1200.0, "logps/rejected": -1340.0, "loss": 1.1376953125, "nll_loss": 1.677734375, "rewards/accuracies": 0.75, "rewards/chosen": 0.91015625, "rewards/margins": 1.685546875, "rewards/rejected": -0.775390625, "step": 24 }, { "epoch": 0.07496251874062969, "grad_norm": 87.0, "learning_rate": 9.987468473277975e-06, "logits/chosen": -1.697265625, "logits/rejected": -1.7109375, "logps/chosen": -2006.0, "logps/rejected": -2126.0, "loss": 1.259765625, "nll_loss": 2.427734375, "rewards/accuracies": 0.75, "rewards/chosen": 2.328125, "rewards/margins": 2.7060546875, "rewards/rejected": -0.3779296875, "step": 25 }, { "epoch": 0.07796101949025487, "grad_norm": 77.0, "learning_rate": 9.985087632242634e-06, "logits/chosen": -1.69140625, "logits/rejected": -1.68359375, "logps/chosen": -949.5, "logps/rejected": -1022.5, "loss": 1.2421875, "nll_loss": 1.724609375, "rewards/accuracies": 0.875, "rewards/chosen": 0.53125, "rewards/margins": 1.041015625, "rewards/rejected": -0.509765625, "step": 26 }, { "epoch": 0.08095952023988005, "grad_norm": 85.5, "learning_rate": 9.982500190692846e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.7265625, "logps/chosen": -2014.0, "logps/rejected": -2064.0, "loss": 1.318359375, "nll_loss": 2.41796875, "rewards/accuracies": 1.0, "rewards/chosen": 2.53125, "rewards/margins": 2.265625, "rewards/rejected": 0.265625, "step": 27 }, { "epoch": 0.08395802098950525, "grad_norm": 65.5, "learning_rate": 9.97970625586178e-06, "logits/chosen": -1.6796875, "logits/rejected": -1.64453125, "logps/chosen": -1634.5, "logps/rejected": -1626.5, "loss": 1.19140625, "nll_loss": 1.9072265625, "rewards/accuracies": 0.875, "rewards/chosen": 1.0390625, "rewards/margins": 2.0390625, "rewards/rejected": -1.0, "step": 28 }, { "epoch": 0.08695652173913043, "grad_norm": 99.5, "learning_rate": 9.976705943540458e-06, "logits/chosen": -1.5234375, "logits/rejected": -1.54296875, "logps/chosen": -861.5, "logps/rejected": -1116.0, "loss": 0.9560546875, "nll_loss": 1.390625, "rewards/accuracies": 0.875, "rewards/chosen": 1.953125, "rewards/margins": 4.0, "rewards/rejected": -2.046875, "step": 29 }, { "epoch": 0.08995502248875563, "grad_norm": 146.0, "learning_rate": 9.973499378072947e-06, "logits/chosen": -1.794921875, "logits/rejected": -1.72265625, "logps/chosen": -1960.0, "logps/rejected": -1952.0, "loss": 1.140625, "nll_loss": 2.068359375, "rewards/accuracies": 0.875, "rewards/chosen": 0.359375, "rewards/margins": 2.90625, "rewards/rejected": -2.546875, "step": 30 }, { "epoch": 0.09295352323838081, "grad_norm": 78.5, "learning_rate": 9.970086692351204e-06, "logits/chosen": -1.755859375, "logits/rejected": -1.7109375, "logps/chosen": -1414.0, "logps/rejected": -1594.0, "loss": 1.0595703125, "nll_loss": 1.90625, "rewards/accuracies": 0.75, "rewards/chosen": 1.31640625, "rewards/margins": 3.203125, "rewards/rejected": -1.8828125, "step": 31 }, { "epoch": 0.095952023988006, "grad_norm": 88.5, "learning_rate": 9.966468027809582e-06, "logits/chosen": -1.662109375, "logits/rejected": -1.658203125, "logps/chosen": -1564.0, "logps/rejected": -1528.0, "loss": 0.974609375, "nll_loss": 1.8515625, "rewards/accuracies": 1.0, "rewards/chosen": 1.078125, "rewards/margins": 3.421875, "rewards/rejected": -2.34375, "step": 32 }, { "epoch": 0.09895052473763119, "grad_norm": 66.0, "learning_rate": 9.962643534418954e-06, "logits/chosen": -1.68359375, "logits/rejected": -1.560546875, "logps/chosen": -1242.25, "logps/rejected": -1332.125, "loss": 1.10546875, "nll_loss": 1.994140625, "rewards/accuracies": 1.0, "rewards/chosen": 2.53515625, "rewards/margins": 2.921875, "rewards/rejected": -0.384765625, "step": 33 }, { "epoch": 0.10194902548725637, "grad_norm": 94.5, "learning_rate": 9.958613370680507e-06, "logits/chosen": -1.78125, "logits/rejected": -1.740234375, "logps/chosen": -1670.0, "logps/rejected": -1626.0, "loss": 1.232421875, "nll_loss": 2.09765625, "rewards/accuracies": 1.0, "rewards/chosen": 0.86328125, "rewards/margins": 1.953125, "rewards/rejected": -1.08984375, "step": 34 }, { "epoch": 0.10494752623688156, "grad_norm": 78.0, "learning_rate": 9.954377703619171e-06, "logits/chosen": -1.6953125, "logits/rejected": -1.689453125, "logps/chosen": -1042.0, "logps/rejected": -1060.5, "loss": 1.1181640625, "nll_loss": 1.666015625, "rewards/accuracies": 1.0, "rewards/chosen": 3.6796875, "rewards/margins": 2.91015625, "rewards/rejected": 0.76953125, "step": 35 }, { "epoch": 0.10794602698650675, "grad_norm": 94.0, "learning_rate": 9.949936708776692e-06, "logits/chosen": -1.67578125, "logits/rejected": -1.65625, "logps/chosen": -952.5, "logps/rejected": -1075.0, "loss": 1.0927734375, "nll_loss": 1.6328125, "rewards/accuracies": 1.0, "rewards/chosen": 2.5859375, "rewards/margins": 5.521484375, "rewards/rejected": -2.943359375, "step": 36 }, { "epoch": 0.11094452773613193, "grad_norm": 72.0, "learning_rate": 9.945290570204361e-06, "logits/chosen": -1.736328125, "logits/rejected": -1.720703125, "logps/chosen": -792.0, "logps/rejected": -972.0, "loss": 1.0947265625, "nll_loss": 1.509765625, "rewards/accuracies": 0.875, "rewards/chosen": 2.33203125, "rewards/margins": 2.1484375, "rewards/rejected": 0.18359375, "step": 37 }, { "epoch": 0.11394302848575712, "grad_norm": 92.5, "learning_rate": 9.940439480455386e-06, "logits/chosen": -1.69140625, "logits/rejected": -1.677734375, "logps/chosen": -1531.0, "logps/rejected": -1532.0, "loss": 0.98828125, "nll_loss": 1.890625, "rewards/accuracies": 1.0, "rewards/chosen": 2.40234375, "rewards/margins": 3.4296875, "rewards/rejected": -1.02734375, "step": 38 }, { "epoch": 0.11694152923538231, "grad_norm": 66.0, "learning_rate": 9.935383640576915e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.671875, "logps/chosen": -1572.0, "logps/rejected": -1528.0, "loss": 1.048828125, "nll_loss": 1.96484375, "rewards/accuracies": 0.875, "rewards/chosen": 1.85546875, "rewards/margins": 3.95703125, "rewards/rejected": -2.1015625, "step": 39 }, { "epoch": 0.1199400299850075, "grad_norm": 80.5, "learning_rate": 9.930123260101697e-06, "logits/chosen": -1.861328125, "logits/rejected": -1.802734375, "logps/chosen": -1168.0, "logps/rejected": -1392.0, "loss": 1.087890625, "nll_loss": 1.6796875, "rewards/accuracies": 1.0, "rewards/chosen": 0.8984375, "rewards/margins": 2.65625, "rewards/rejected": -1.7578125, "step": 40 }, { "epoch": 0.12293853073463268, "grad_norm": 77.5, "learning_rate": 9.9246585570394e-06, "logits/chosen": -1.73828125, "logits/rejected": -1.552734375, "logps/chosen": -1316.25, "logps/rejected": -1588.0, "loss": 0.89453125, "nll_loss": 1.591796875, "rewards/accuracies": 1.0, "rewards/chosen": 1.75, "rewards/margins": 3.625, "rewards/rejected": -1.875, "step": 41 }, { "epoch": 0.12593703148425786, "grad_norm": 143.0, "learning_rate": 9.918989757867584e-06, "logits/chosen": -1.7890625, "logits/rejected": -1.7578125, "logps/chosen": -2000.0, "logps/rejected": -1968.0, "loss": 1.24609375, "nll_loss": 2.30078125, "rewards/accuracies": 1.0, "rewards/chosen": -0.078125, "rewards/margins": 3.109375, "rewards/rejected": -3.1875, "step": 42 }, { "epoch": 0.12893553223388307, "grad_norm": 61.25, "learning_rate": 9.9131170975223e-06, "logits/chosen": -1.7265625, "logits/rejected": -1.720703125, "logps/chosen": -1172.0, "logps/rejected": -1382.0, "loss": 1.087890625, "nll_loss": 1.943359375, "rewards/accuracies": 0.875, "rewards/chosen": 1.83984375, "rewards/margins": 3.05859375, "rewards/rejected": -1.21875, "step": 43 }, { "epoch": 0.13193403298350825, "grad_norm": 71.0, "learning_rate": 9.907040819388372e-06, "logits/chosen": -1.794921875, "logits/rejected": -1.685546875, "logps/chosen": -946.0, "logps/rejected": -863.25, "loss": 0.943359375, "nll_loss": 1.4990234375, "rewards/accuracies": 1.0, "rewards/chosen": 1.22265625, "rewards/margins": 2.45703125, "rewards/rejected": -1.2353515625, "step": 44 }, { "epoch": 0.13493253373313344, "grad_norm": 62.25, "learning_rate": 9.90076117528929e-06, "logits/chosen": -1.64453125, "logits/rejected": -1.603515625, "logps/chosen": -743.25, "logps/rejected": -811.625, "loss": 0.95703125, "nll_loss": 1.330078125, "rewards/accuracies": 1.0, "rewards/chosen": 1.2421875, "rewards/margins": 2.087890625, "rewards/rejected": -0.84375, "step": 45 }, { "epoch": 0.13793103448275862, "grad_norm": 62.0, "learning_rate": 9.89427842547679e-06, "logits/chosen": -1.59375, "logits/rejected": -1.65234375, "logps/chosen": -1245.5, "logps/rejected": -1451.0, "loss": 1.017578125, "nll_loss": 1.7666015625, "rewards/accuracies": 0.875, "rewards/chosen": 1.2578125, "rewards/margins": 3.19921875, "rewards/rejected": -1.94140625, "step": 46 }, { "epoch": 0.1409295352323838, "grad_norm": 92.0, "learning_rate": 9.88759283862006e-06, "logits/chosen": -1.80078125, "logits/rejected": -1.7578125, "logps/chosen": -1332.0, "logps/rejected": -1294.0, "loss": 1.0537109375, "nll_loss": 2.0556640625, "rewards/accuracies": 0.875, "rewards/chosen": 1.17578125, "rewards/margins": 3.3359375, "rewards/rejected": -2.16015625, "step": 47 }, { "epoch": 0.14392803598200898, "grad_norm": 70.0, "learning_rate": 9.880704691794608e-06, "logits/chosen": -1.5703125, "logits/rejected": -1.541015625, "logps/chosen": -1231.25, "logps/rejected": -1248.5, "loss": 1.08203125, "nll_loss": 1.873046875, "rewards/accuracies": 0.875, "rewards/chosen": 0.82421875, "rewards/margins": 2.75390625, "rewards/rejected": -1.9296875, "step": 48 }, { "epoch": 0.1469265367316342, "grad_norm": 88.5, "learning_rate": 9.873614270470778e-06, "logits/chosen": -1.755859375, "logits/rejected": -1.75, "logps/chosen": -2344.0, "logps/rejected": -2312.0, "loss": 1.1123046875, "nll_loss": 2.46484375, "rewards/accuracies": 1.0, "rewards/chosen": 2.375, "rewards/margins": 4.375, "rewards/rejected": -2.0, "step": 49 }, { "epoch": 0.14992503748125938, "grad_norm": 45.75, "learning_rate": 9.866321868501914e-06, "logits/chosen": -1.689453125, "logits/rejected": -1.673828125, "logps/chosen": -1115.75, "logps/rejected": -1157.375, "loss": 0.8994140625, "nll_loss": 1.236328125, "rewards/accuracies": 1.0, "rewards/chosen": 1.888671875, "rewards/margins": 2.609375, "rewards/rejected": -0.7197265625, "step": 50 }, { "epoch": 0.15292353823088456, "grad_norm": 64.5, "learning_rate": 9.858827788112195e-06, "logits/chosen": -1.685546875, "logits/rejected": -1.646484375, "logps/chosen": -1329.0, "logps/rejected": -1496.0, "loss": 1.095703125, "nll_loss": 1.66015625, "rewards/accuracies": 0.875, "rewards/chosen": 1.96875, "rewards/margins": 6.296875, "rewards/rejected": -4.328125, "step": 51 }, { "epoch": 0.15592203898050974, "grad_norm": 55.25, "learning_rate": 9.851132339884097e-06, "logits/chosen": -1.642578125, "logits/rejected": -1.59765625, "logps/chosen": -847.21875, "logps/rejected": -881.5, "loss": 0.8349609375, "nll_loss": 1.100830078125, "rewards/accuracies": 0.875, "rewards/chosen": 2.23828125, "rewards/margins": 3.14453125, "rewards/rejected": -0.90234375, "step": 52 }, { "epoch": 0.15892053973013492, "grad_norm": 57.5, "learning_rate": 9.843235842745527e-06, "logits/chosen": -1.662109375, "logits/rejected": -1.6640625, "logps/chosen": -1027.3125, "logps/rejected": -1074.5625, "loss": 0.8544921875, "nll_loss": 1.43701171875, "rewards/accuracies": 0.875, "rewards/chosen": 2.14453125, "rewards/margins": 3.2890625, "rewards/rejected": -1.146484375, "step": 53 }, { "epoch": 0.1619190404797601, "grad_norm": 53.0, "learning_rate": 9.835138623956603e-06, "logits/chosen": -1.685546875, "logits/rejected": -1.62890625, "logps/chosen": -1329.0, "logps/rejected": -1319.5, "loss": 0.94921875, "nll_loss": 1.47802734375, "rewards/accuracies": 0.875, "rewards/chosen": 2.25, "rewards/margins": 2.953125, "rewards/rejected": -0.703125, "step": 54 }, { "epoch": 0.16491754122938532, "grad_norm": 68.0, "learning_rate": 9.826841019096095e-06, "logits/chosen": -1.818359375, "logits/rejected": -1.7421875, "logps/chosen": -1718.0, "logps/rejected": -1801.0, "loss": 0.8681640625, "nll_loss": 2.0546875, "rewards/accuracies": 1.0, "rewards/chosen": 2.57421875, "rewards/margins": 4.7734375, "rewards/rejected": -2.203125, "step": 55 }, { "epoch": 0.1679160419790105, "grad_norm": 73.5, "learning_rate": 9.818343372047509e-06, "logits/chosen": -1.591796875, "logits/rejected": -1.6640625, "logps/chosen": -1427.5, "logps/rejected": -1725.0, "loss": 0.8447265625, "nll_loss": 1.916015625, "rewards/accuracies": 1.0, "rewards/chosen": 2.6171875, "rewards/margins": 4.44921875, "rewards/rejected": -1.828125, "step": 56 }, { "epoch": 0.17091454272863568, "grad_norm": 57.75, "learning_rate": 9.80964603498485e-06, "logits/chosen": -1.787109375, "logits/rejected": -1.751953125, "logps/chosen": -1070.25, "logps/rejected": -1346.0, "loss": 0.9833984375, "nll_loss": 1.65771484375, "rewards/accuracies": 0.875, "rewards/chosen": 0.73046875, "rewards/margins": 3.03515625, "rewards/rejected": -2.30859375, "step": 57 }, { "epoch": 0.17391304347826086, "grad_norm": 86.5, "learning_rate": 9.80074936835801e-06, "logits/chosen": -1.74609375, "logits/rejected": -1.7421875, "logps/chosen": -1275.0, "logps/rejected": -1245.0, "loss": 0.9365234375, "nll_loss": 1.6171875, "rewards/accuracies": 1.0, "rewards/chosen": 1.09375, "rewards/margins": 3.78125, "rewards/rejected": -2.6875, "step": 58 }, { "epoch": 0.17691154422788605, "grad_norm": 54.25, "learning_rate": 9.79165374087784e-06, "logits/chosen": -1.66015625, "logits/rejected": -1.6328125, "logps/chosen": -723.875, "logps/rejected": -758.0, "loss": 0.8095703125, "nll_loss": 1.4111328125, "rewards/accuracies": 1.0, "rewards/chosen": 1.2109375, "rewards/margins": 3.40625, "rewards/rejected": -2.19921875, "step": 59 }, { "epoch": 0.17991004497751126, "grad_norm": 91.0, "learning_rate": 9.782359529500867e-06, "logits/chosen": -1.650390625, "logits/rejected": -1.658203125, "logps/chosen": -1601.5, "logps/rejected": -1704.0, "loss": 0.78515625, "nll_loss": 1.71728515625, "rewards/accuracies": 1.0, "rewards/chosen": 1.41015625, "rewards/margins": 6.46875, "rewards/rejected": -5.0625, "step": 60 }, { "epoch": 0.18290854572713644, "grad_norm": 95.0, "learning_rate": 9.772867119413667e-06, "logits/chosen": -1.57421875, "logits/rejected": -1.724609375, "logps/chosen": -1421.0, "logps/rejected": -1319.0, "loss": 0.9287109375, "nll_loss": 2.1708984375, "rewards/accuracies": 1.0, "rewards/chosen": 1.3203125, "rewards/margins": 5.75390625, "rewards/rejected": -4.4375, "step": 61 }, { "epoch": 0.18590704647676162, "grad_norm": 46.0, "learning_rate": 9.763176904016914e-06, "logits/chosen": -1.669921875, "logits/rejected": -1.634765625, "logps/chosen": -1163.0, "logps/rejected": -1400.0, "loss": 0.7841796875, "nll_loss": 1.623046875, "rewards/accuracies": 1.0, "rewards/chosen": 2.6875, "rewards/margins": 5.9296875, "rewards/rejected": -3.2421875, "step": 62 }, { "epoch": 0.1889055472263868, "grad_norm": 64.5, "learning_rate": 9.753289284909058e-06, "logits/chosen": -1.802734375, "logits/rejected": -1.712890625, "logps/chosen": -1660.0, "logps/rejected": -1758.0, "loss": 0.9921875, "nll_loss": 2.16015625, "rewards/accuracies": 1.0, "rewards/chosen": 2.4375, "rewards/margins": 4.7265625, "rewards/rejected": -2.2890625, "step": 63 }, { "epoch": 0.191904047976012, "grad_norm": 36.25, "learning_rate": 9.743204671869694e-06, "logits/chosen": -1.728515625, "logits/rejected": -1.587890625, "logps/chosen": -1348.0, "logps/rejected": -1258.0, "loss": 0.8037109375, "nll_loss": 1.7705078125, "rewards/accuracies": 1.0, "rewards/chosen": 2.24609375, "rewards/margins": 5.19140625, "rewards/rejected": -2.9453125, "step": 64 }, { "epoch": 0.19490254872563717, "grad_norm": 61.75, "learning_rate": 9.73292348284258e-06, "logits/chosen": -1.61328125, "logits/rejected": -1.63671875, "logps/chosen": -1367.875, "logps/rejected": -1122.0, "loss": 0.927734375, "nll_loss": 1.49462890625, "rewards/accuracies": 1.0, "rewards/chosen": 2.412109375, "rewards/margins": 3.296875, "rewards/rejected": -0.8828125, "step": 65 }, { "epoch": 0.19790104947526238, "grad_norm": 79.0, "learning_rate": 9.722446143918307e-06, "logits/chosen": -1.6171875, "logits/rejected": -1.58984375, "logps/chosen": -1438.0, "logps/rejected": -1646.0, "loss": 0.767578125, "nll_loss": 1.986328125, "rewards/accuracies": 1.0, "rewards/chosen": 2.34765625, "rewards/margins": 5.8046875, "rewards/rejected": -3.4609375, "step": 66 }, { "epoch": 0.20089955022488756, "grad_norm": 120.5, "learning_rate": 9.711773089316645e-06, "logits/chosen": -1.6328125, "logits/rejected": -1.72265625, "logps/chosen": -1596.0, "logps/rejected": -1788.0, "loss": 0.822265625, "nll_loss": 2.0546875, "rewards/accuracies": 1.0, "rewards/chosen": 4.5, "rewards/margins": 6.453125, "rewards/rejected": -1.953125, "step": 67 }, { "epoch": 0.20389805097451275, "grad_norm": 61.5, "learning_rate": 9.70090476136855e-06, "logits/chosen": -1.685546875, "logits/rejected": -1.685546875, "logps/chosen": -1428.0, "logps/rejected": -1726.0, "loss": 0.7880859375, "nll_loss": 2.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.78125, "rewards/margins": 6.40625, "rewards/rejected": -2.625, "step": 68 }, { "epoch": 0.20689655172413793, "grad_norm": 77.0, "learning_rate": 9.689841610497828e-06, "logits/chosen": -1.62109375, "logits/rejected": -1.693359375, "logps/chosen": -1035.0, "logps/rejected": -1347.0, "loss": 0.8095703125, "nll_loss": 1.748046875, "rewards/accuracies": 1.0, "rewards/chosen": 3.4140625, "rewards/margins": 5.28125, "rewards/rejected": -1.8671875, "step": 69 }, { "epoch": 0.2098950524737631, "grad_norm": 41.0, "learning_rate": 9.678584095202468e-06, "logits/chosen": -1.78515625, "logits/rejected": -1.697265625, "logps/chosen": -1078.25, "logps/rejected": -1206.5, "loss": 0.8173828125, "nll_loss": 1.513671875, "rewards/accuracies": 1.0, "rewards/chosen": 2.53125, "rewards/margins": 5.34375, "rewards/rejected": -2.8125, "step": 70 }, { "epoch": 0.2128935532233883, "grad_norm": 63.25, "learning_rate": 9.667132682035646e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.712890625, "logps/chosen": -1301.5625, "logps/rejected": -1591.125, "loss": 0.5625, "nll_loss": 1.4111328125, "rewards/accuracies": 1.0, "rewards/chosen": 2.837890625, "rewards/margins": 7.5, "rewards/rejected": -4.6640625, "step": 71 }, { "epoch": 0.2158920539730135, "grad_norm": 73.5, "learning_rate": 9.655487845586378e-06, "logits/chosen": -1.71484375, "logits/rejected": -1.70703125, "logps/chosen": -1466.0, "logps/rejected": -1548.75, "loss": 0.751953125, "nll_loss": 1.802734375, "rewards/accuracies": 1.0, "rewards/chosen": 3.09765625, "rewards/margins": 5.875, "rewards/rejected": -2.78125, "step": 72 }, { "epoch": 0.21889055472263869, "grad_norm": 62.75, "learning_rate": 9.643650068459863e-06, "logits/chosen": -1.765625, "logits/rejected": -1.740234375, "logps/chosen": -1574.0, "logps/rejected": -1474.0, "loss": 0.9892578125, "nll_loss": 2.103515625, "rewards/accuracies": 1.0, "rewards/chosen": 1.703125, "rewards/margins": 3.703125, "rewards/rejected": -2.0, "step": 73 }, { "epoch": 0.22188905547226387, "grad_norm": 64.5, "learning_rate": 9.631619841257477e-06, "logits/chosen": -1.755859375, "logits/rejected": -1.8046875, "logps/chosen": -1734.0, "logps/rejected": -1826.0, "loss": 0.7763671875, "nll_loss": 2.20703125, "rewards/accuracies": 1.0, "rewards/chosen": 3.5390625, "rewards/margins": 7.9453125, "rewards/rejected": -4.40625, "step": 74 }, { "epoch": 0.22488755622188905, "grad_norm": 48.0, "learning_rate": 9.619397662556434e-06, "logits/chosen": -1.78515625, "logits/rejected": -1.646484375, "logps/chosen": -757.34375, "logps/rejected": -755.5, "loss": 0.8515625, "nll_loss": 1.28759765625, "rewards/accuracies": 1.0, "rewards/chosen": 1.5859375, "rewards/margins": 3.9296875, "rewards/rejected": -2.34375, "step": 75 }, { "epoch": 0.22788605697151423, "grad_norm": 68.5, "learning_rate": 9.606984038889142e-06, "logits/chosen": -1.654296875, "logits/rejected": -1.615234375, "logps/chosen": -1081.0, "logps/rejected": -1185.0, "loss": 0.9453125, "nll_loss": 1.45703125, "rewards/accuracies": 1.0, "rewards/chosen": 1.015625, "rewards/margins": 3.859375, "rewards/rejected": -2.84375, "step": 76 }, { "epoch": 0.23088455772113944, "grad_norm": 33.25, "learning_rate": 9.594379484722185e-06, "logits/chosen": -1.607421875, "logits/rejected": -1.609375, "logps/chosen": -888.75, "logps/rejected": -1034.0, "loss": 0.587890625, "nll_loss": 1.18896484375, "rewards/accuracies": 1.0, "rewards/chosen": 2.0546875, "rewards/margins": 8.9609375, "rewards/rejected": -6.890625, "step": 77 }, { "epoch": 0.23388305847076463, "grad_norm": 61.0, "learning_rate": 9.581584522435025e-06, "logits/chosen": -1.751953125, "logits/rejected": -1.740234375, "logps/chosen": -2404.0, "logps/rejected": -2376.0, "loss": 0.8505859375, "nll_loss": 2.4453125, "rewards/accuracies": 1.0, "rewards/chosen": 3.5, "rewards/margins": 7.25, "rewards/rejected": -3.75, "step": 78 }, { "epoch": 0.2368815592203898, "grad_norm": 82.0, "learning_rate": 9.568599682298337e-06, "logits/chosen": -1.57421875, "logits/rejected": -1.564453125, "logps/chosen": -1025.5, "logps/rejected": -1050.0, "loss": 0.935546875, "nll_loss": 1.896484375, "rewards/accuracies": 1.0, "rewards/chosen": 2.7265625, "rewards/margins": 4.375, "rewards/rejected": -1.64453125, "step": 79 }, { "epoch": 0.239880059970015, "grad_norm": 47.25, "learning_rate": 9.555425502452038e-06, "logits/chosen": -1.744140625, "logits/rejected": -1.724609375, "logps/chosen": -1289.0, "logps/rejected": -1327.0, "loss": 0.845703125, "nll_loss": 1.751953125, "rewards/accuracies": 1.0, "rewards/chosen": 2.05078125, "rewards/margins": 5.5, "rewards/rejected": -3.453125, "step": 80 }, { "epoch": 0.24287856071964017, "grad_norm": 57.0, "learning_rate": 9.542062528882989e-06, "logits/chosen": -1.619140625, "logits/rejected": -1.607421875, "logps/chosen": -795.0625, "logps/rejected": -899.0, "loss": 0.7236328125, "nll_loss": 1.14697265625, "rewards/accuracies": 0.875, "rewards/chosen": 1.2978515625, "rewards/margins": 4.11328125, "rewards/rejected": -2.8125, "step": 81 }, { "epoch": 0.24587706146926536, "grad_norm": 63.5, "learning_rate": 9.528511315402358e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.791015625, "logps/chosen": -1182.25, "logps/rejected": -1469.75, "loss": 0.685546875, "nll_loss": 1.826171875, "rewards/accuracies": 1.0, "rewards/chosen": 3.671875, "rewards/margins": 7.0078125, "rewards/rejected": -3.33203125, "step": 82 }, { "epoch": 0.24887556221889057, "grad_norm": 71.5, "learning_rate": 9.514772423622675e-06, "logits/chosen": -1.73828125, "logits/rejected": -1.740234375, "logps/chosen": -1593.0, "logps/rejected": -1725.0, "loss": 0.8203125, "nll_loss": 2.0, "rewards/accuracies": 1.0, "rewards/chosen": 3.953125, "rewards/margins": 7.2421875, "rewards/rejected": -3.2890625, "step": 83 }, { "epoch": 0.2518740629685157, "grad_norm": 44.5, "learning_rate": 9.500846422934557e-06, "logits/chosen": -1.7265625, "logits/rejected": -1.6953125, "logps/chosen": -1706.0, "logps/rejected": -1723.0, "loss": 0.724609375, "nll_loss": 2.033203125, "rewards/accuracies": 1.0, "rewards/chosen": 2.583984375, "rewards/margins": 6.1171875, "rewards/rejected": -3.53515625, "step": 84 }, { "epoch": 0.25487256371814093, "grad_norm": 55.0, "learning_rate": 9.4867338904831e-06, "logits/chosen": -1.66796875, "logits/rejected": -1.630859375, "logps/chosen": -954.0, "logps/rejected": -1090.0, "loss": 0.7001953125, "nll_loss": 1.50390625, "rewards/accuracies": 1.0, "rewards/chosen": 1.728515625, "rewards/margins": 5.0625, "rewards/rejected": -3.33203125, "step": 85 }, { "epoch": 0.25787106446776614, "grad_norm": 94.0, "learning_rate": 9.472435411143979e-06, "logits/chosen": -1.771484375, "logits/rejected": -1.798828125, "logps/chosen": -1596.0, "logps/rejected": -1594.0, "loss": 0.89453125, "nll_loss": 2.115234375, "rewards/accuracies": 1.0, "rewards/chosen": 1.2578125, "rewards/margins": 5.765625, "rewards/rejected": -4.5078125, "step": 86 }, { "epoch": 0.2608695652173913, "grad_norm": 46.75, "learning_rate": 9.457951577499187e-06, "logits/chosen": -1.783203125, "logits/rejected": -1.744140625, "logps/chosen": -1765.0, "logps/rejected": -2003.0, "loss": 0.6748046875, "nll_loss": 2.07421875, "rewards/accuracies": 1.0, "rewards/chosen": 3.234375, "rewards/margins": 7.90625, "rewards/rejected": -4.671875, "step": 87 }, { "epoch": 0.2638680659670165, "grad_norm": 40.5, "learning_rate": 9.443282989812494e-06, "logits/chosen": -1.748046875, "logits/rejected": -1.7578125, "logps/chosen": -1503.4375, "logps/rejected": -1483.0, "loss": 0.67919921875, "nll_loss": 2.0185546875, "rewards/accuracies": 1.0, "rewards/chosen": 3.05078125, "rewards/margins": 7.390625, "rewards/rejected": -4.33984375, "step": 88 }, { "epoch": 0.26686656671664166, "grad_norm": 105.5, "learning_rate": 9.428430256004558e-06, "logits/chosen": -1.63671875, "logits/rejected": -1.703125, "logps/chosen": -1257.0, "logps/rejected": -1324.0, "loss": 0.84716796875, "nll_loss": 1.6064453125, "rewards/accuracies": 1.0, "rewards/chosen": 3.99609375, "rewards/margins": 8.45703125, "rewards/rejected": -4.4453125, "step": 89 }, { "epoch": 0.2698650674662669, "grad_norm": 60.0, "learning_rate": 9.413393991627737e-06, "logits/chosen": -1.552734375, "logits/rejected": -1.513671875, "logps/chosen": -624.09375, "logps/rejected": -702.125, "loss": 0.67236328125, "nll_loss": 1.203125, "rewards/accuracies": 1.0, "rewards/chosen": 2.826171875, "rewards/margins": 4.8828125, "rewards/rejected": -2.0625, "step": 90 }, { "epoch": 0.272863568215892, "grad_norm": 83.5, "learning_rate": 9.398174819840577e-06, "logits/chosen": -1.63671875, "logits/rejected": -1.650390625, "logps/chosen": -1771.0, "logps/rejected": -1917.0, "loss": 0.732421875, "nll_loss": 2.076171875, "rewards/accuracies": 1.0, "rewards/chosen": 4.205078125, "rewards/margins": 7.234375, "rewards/rejected": -3.03125, "step": 91 }, { "epoch": 0.27586206896551724, "grad_norm": 92.0, "learning_rate": 9.382773371381986e-06, "logits/chosen": -1.759765625, "logits/rejected": -1.77734375, "logps/chosen": -1620.0, "logps/rejected": -1728.0, "loss": 0.8486328125, "nll_loss": 2.0859375, "rewards/accuracies": 1.0, "rewards/chosen": 4.640625, "rewards/margins": 6.5625, "rewards/rejected": -1.921875, "step": 92 }, { "epoch": 0.27886056971514245, "grad_norm": 60.5, "learning_rate": 9.367190284545087e-06, "logits/chosen": -1.6484375, "logits/rejected": -1.65625, "logps/chosen": -1077.5, "logps/rejected": -1267.0, "loss": 0.634765625, "nll_loss": 1.662109375, "rewards/accuracies": 1.0, "rewards/chosen": 4.134765625, "rewards/margins": 7.2734375, "rewards/rejected": -3.140625, "step": 93 }, { "epoch": 0.2818590704647676, "grad_norm": 70.5, "learning_rate": 9.351426205150778e-06, "logits/chosen": -1.708984375, "logits/rejected": -1.744140625, "logps/chosen": -1548.0, "logps/rejected": -1706.0, "loss": 0.755859375, "nll_loss": 1.7734375, "rewards/accuracies": 1.0, "rewards/chosen": 2.359375, "rewards/margins": 5.890625, "rewards/rejected": -3.53125, "step": 94 }, { "epoch": 0.2848575712143928, "grad_norm": 32.25, "learning_rate": 9.335481786520955e-06, "logits/chosen": -1.66796875, "logits/rejected": -1.595703125, "logps/chosen": -977.625, "logps/rejected": -1124.0, "loss": 0.6640625, "nll_loss": 1.5087890625, "rewards/accuracies": 1.0, "rewards/chosen": 2.158203125, "rewards/margins": 5.28515625, "rewards/rejected": -3.12890625, "step": 95 }, { "epoch": 0.28785607196401797, "grad_norm": 45.25, "learning_rate": 9.319357689451444e-06, "logits/chosen": -1.845703125, "logits/rejected": -1.802734375, "logps/chosen": -1418.0, "logps/rejected": -1503.0, "loss": 0.7265625, "nll_loss": 1.974609375, "rewards/accuracies": 1.0, "rewards/chosen": 3.48046875, "rewards/margins": 7.765625, "rewards/rejected": -4.28125, "step": 96 }, { "epoch": 0.2908545727136432, "grad_norm": 24.625, "learning_rate": 9.30305458218461e-06, "logits/chosen": -1.650390625, "logits/rejected": -1.640625, "logps/chosen": -1281.125, "logps/rejected": -1367.25, "loss": 0.52880859375, "nll_loss": 1.446044921875, "rewards/accuracies": 1.0, "rewards/chosen": 3.453125, "rewards/margins": 11.46875, "rewards/rejected": -8.03125, "step": 97 }, { "epoch": 0.2938530734632684, "grad_norm": 57.25, "learning_rate": 9.286573140381663e-06, "logits/chosen": -1.65234375, "logits/rejected": -1.6875, "logps/chosen": -1049.5, "logps/rejected": -1349.5, "loss": 0.79296875, "nll_loss": 1.53515625, "rewards/accuracies": 1.0, "rewards/chosen": 1.234375, "rewards/margins": 4.796875, "rewards/rejected": -3.5625, "step": 98 }, { "epoch": 0.29685157421289354, "grad_norm": 55.25, "learning_rate": 9.26991404709466e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.708984375, "logps/chosen": -1141.0, "logps/rejected": -1280.0, "loss": 0.771484375, "nll_loss": 1.77734375, "rewards/accuracies": 1.0, "rewards/chosen": 2.1640625, "rewards/margins": 8.7421875, "rewards/rejected": -6.59375, "step": 99 }, { "epoch": 0.29985007496251875, "grad_norm": 49.25, "learning_rate": 9.253077992738193e-06, "logits/chosen": -1.771484375, "logits/rejected": -1.77734375, "logps/chosen": -1269.0, "logps/rejected": -1431.0, "loss": 0.66796875, "nll_loss": 1.712890625, "rewards/accuracies": 1.0, "rewards/chosen": 2.61328125, "rewards/margins": 6.71875, "rewards/rejected": -4.1015625, "step": 100 }, { "epoch": 0.3028485757121439, "grad_norm": 32.75, "learning_rate": 9.236065675060775e-06, "logits/chosen": -1.794921875, "logits/rejected": -1.740234375, "logps/chosen": -1110.5, "logps/rejected": -1320.0, "loss": 0.673828125, "nll_loss": 1.9921875, "rewards/accuracies": 1.0, "rewards/chosen": 3.16796875, "rewards/margins": 10.1953125, "rewards/rejected": -7.02734375, "step": 101 }, { "epoch": 0.3058470764617691, "grad_norm": 90.0, "learning_rate": 9.218877799115929e-06, "logits/chosen": -1.701171875, "logits/rejected": -1.69140625, "logps/chosen": -1415.0, "logps/rejected": -1542.625, "loss": 0.791015625, "nll_loss": 1.62841796875, "rewards/accuracies": 1.0, "rewards/chosen": 1.837890625, "rewards/margins": 7.8046875, "rewards/rejected": -5.96875, "step": 102 }, { "epoch": 0.30884557721139433, "grad_norm": 62.5, "learning_rate": 9.201515077232958e-06, "logits/chosen": -1.67578125, "logits/rejected": -1.67578125, "logps/chosen": -1520.0, "logps/rejected": -1620.0, "loss": 0.5986328125, "nll_loss": 1.732421875, "rewards/accuracies": 1.0, "rewards/chosen": 2.8125, "rewards/margins": 9.28125, "rewards/rejected": -6.46875, "step": 103 }, { "epoch": 0.3118440779610195, "grad_norm": 43.0, "learning_rate": 9.183978228987436e-06, "logits/chosen": -1.650390625, "logits/rejected": -1.642578125, "logps/chosen": -1003.0, "logps/rejected": -1086.0, "loss": 0.646484375, "nll_loss": 1.595703125, "rewards/accuracies": 1.0, "rewards/chosen": 2.23046875, "rewards/margins": 9.859375, "rewards/rejected": -7.6171875, "step": 104 }, { "epoch": 0.3148425787106447, "grad_norm": 15.5, "learning_rate": 9.166267981171369e-06, "logits/chosen": -1.623046875, "logits/rejected": -1.62890625, "logps/chosen": -297.4375, "logps/rejected": -299.25, "loss": 0.4462890625, "nll_loss": 0.8310546875, "rewards/accuracies": 1.0, "rewards/chosen": 1.98046875, "rewards/margins": 5.640625, "rewards/rejected": -3.66015625, "step": 105 }, { "epoch": 0.31784107946026985, "grad_norm": 63.75, "learning_rate": 9.148385067763094e-06, "logits/chosen": -1.693359375, "logits/rejected": -1.708984375, "logps/chosen": -1387.25, "logps/rejected": -1392.5, "loss": 0.6611328125, "nll_loss": 1.66943359375, "rewards/accuracies": 1.0, "rewards/chosen": 3.7109375, "rewards/margins": 6.71875, "rewards/rejected": -3.0078125, "step": 106 }, { "epoch": 0.32083958020989506, "grad_norm": 82.5, "learning_rate": 9.130330229896846e-06, "logits/chosen": -1.8203125, "logits/rejected": -1.8125, "logps/chosen": -1944.0, "logps/rejected": -2220.0, "loss": 0.7900390625, "nll_loss": 2.06640625, "rewards/accuracies": 1.0, "rewards/chosen": 2.390625, "rewards/margins": 7.890625, "rewards/rejected": -5.5, "step": 107 }, { "epoch": 0.3238380809595202, "grad_norm": 36.5, "learning_rate": 9.112104215832047e-06, "logits/chosen": -1.5546875, "logits/rejected": -1.59375, "logps/chosen": -1111.25, "logps/rejected": -1300.5, "loss": 0.7431640625, "nll_loss": 1.3056640625, "rewards/accuracies": 1.0, "rewards/chosen": 2.8125, "rewards/margins": 5.55859375, "rewards/rejected": -2.74609375, "step": 108 }, { "epoch": 0.3268365817091454, "grad_norm": 46.5, "learning_rate": 9.093707780922293e-06, "logits/chosen": -1.66015625, "logits/rejected": -1.662109375, "logps/chosen": -1114.0, "logps/rejected": -1274.0, "loss": 0.630859375, "nll_loss": 1.5625, "rewards/accuracies": 1.0, "rewards/chosen": 3.8515625, "rewards/margins": 10.2421875, "rewards/rejected": -6.375, "step": 109 }, { "epoch": 0.32983508245877063, "grad_norm": 38.75, "learning_rate": 9.075141687584056e-06, "logits/chosen": -1.58984375, "logits/rejected": -1.583984375, "logps/chosen": -279.25, "logps/rejected": -589.0, "loss": 0.583984375, "nll_loss": 0.9462890625, "rewards/accuracies": 1.0, "rewards/chosen": 2.0625, "rewards/margins": 8.515625, "rewards/rejected": -6.46875, "step": 110 }, { "epoch": 0.3328335832083958, "grad_norm": 38.5, "learning_rate": 9.056406705265084e-06, "logits/chosen": -1.609375, "logits/rejected": -1.638671875, "logps/chosen": -1255.0, "logps/rejected": -1392.5, "loss": 0.685546875, "nll_loss": 1.7734375, "rewards/accuracies": 1.0, "rewards/chosen": 2.90625, "rewards/margins": 6.09375, "rewards/rejected": -3.1875, "step": 111 }, { "epoch": 0.335832083958021, "grad_norm": 88.5, "learning_rate": 9.037503610412502e-06, "logits/chosen": -1.69921875, "logits/rejected": -1.7265625, "logps/chosen": -1242.5, "logps/rejected": -1498.75, "loss": 0.86669921875, "nll_loss": 1.60302734375, "rewards/accuracies": 1.0, "rewards/chosen": 2.7421875, "rewards/margins": 4.4609375, "rewards/rejected": -1.71875, "step": 112 }, { "epoch": 0.33883058470764615, "grad_norm": 50.25, "learning_rate": 9.018433186440648e-06, "logits/chosen": -1.755859375, "logits/rejected": -1.7265625, "logps/chosen": -1106.0, "logps/rejected": -1189.0, "loss": 0.609375, "nll_loss": 1.650390625, "rewards/accuracies": 1.0, "rewards/chosen": 3.859375, "rewards/margins": 7.1171875, "rewards/rejected": -3.25390625, "step": 113 }, { "epoch": 0.34182908545727136, "grad_norm": 52.25, "learning_rate": 8.999196223698599e-06, "logits/chosen": -1.775390625, "logits/rejected": -1.724609375, "logps/chosen": -1095.25, "logps/rejected": -1240.0, "loss": 0.818359375, "nll_loss": 1.3671875, "rewards/accuracies": 0.875, "rewards/chosen": 2.66796875, "rewards/margins": 4.54296875, "rewards/rejected": -1.875, "step": 114 }, { "epoch": 0.3448275862068966, "grad_norm": 39.0, "learning_rate": 8.979793519437413e-06, "logits/chosen": -1.705078125, "logits/rejected": -1.693359375, "logps/chosen": -1506.0, "logps/rejected": -1627.0, "loss": 0.671875, "nll_loss": 1.873046875, "rewards/accuracies": 1.0, "rewards/chosen": 3.765625, "rewards/margins": 8.8671875, "rewards/rejected": -5.1015625, "step": 115 }, { "epoch": 0.34782608695652173, "grad_norm": 39.0, "learning_rate": 8.960225877777095e-06, "logits/chosen": -1.650390625, "logits/rejected": -1.591796875, "logps/chosen": -1103.5, "logps/rejected": -1111.0, "loss": 0.62353515625, "nll_loss": 1.578125, "rewards/accuracies": 1.0, "rewards/chosen": 3.515625, "rewards/margins": 7.0234375, "rewards/rejected": -3.5078125, "step": 116 }, { "epoch": 0.35082458770614694, "grad_norm": 15.375, "learning_rate": 8.940494109673266e-06, "logits/chosen": -1.650390625, "logits/rejected": -1.642578125, "logps/chosen": -637.25, "logps/rejected": -920.0, "loss": 0.611328125, "nll_loss": 1.068359375, "rewards/accuracies": 1.0, "rewards/chosen": 2.22265625, "rewards/margins": 6.828125, "rewards/rejected": -4.60546875, "step": 117 }, { "epoch": 0.3538230884557721, "grad_norm": 51.0, "learning_rate": 8.920599032883553e-06, "logits/chosen": -1.76171875, "logits/rejected": -1.798828125, "logps/chosen": -1539.0, "logps/rejected": -1982.0, "loss": 0.673828125, "nll_loss": 1.96484375, "rewards/accuracies": 1.0, "rewards/chosen": 3.140625, "rewards/margins": 10.8203125, "rewards/rejected": -7.6796875, "step": 118 }, { "epoch": 0.3568215892053973, "grad_norm": 31.875, "learning_rate": 8.900541471933703e-06, "logits/chosen": -1.7734375, "logits/rejected": -1.75390625, "logps/chosen": -1640.0, "logps/rejected": -1754.0, "loss": 0.685546875, "nll_loss": 2.126953125, "rewards/accuracies": 1.0, "rewards/chosen": 4.0390625, "rewards/margins": 9.140625, "rewards/rejected": -5.1015625, "step": 119 }, { "epoch": 0.3598200899550225, "grad_norm": 23.875, "learning_rate": 8.880322258083408e-06, "logits/chosen": -1.841796875, "logits/rejected": -1.767578125, "logps/chosen": -918.0, "logps/rejected": -1204.5, "loss": 0.4716796875, "nll_loss": 1.4248046875, "rewards/accuracies": 1.0, "rewards/chosen": 3.10546875, "rewards/margins": 9.109375, "rewards/rejected": -6.0, "step": 120 }, { "epoch": 0.36281859070464767, "grad_norm": 34.75, "learning_rate": 8.859942229291856e-06, "logits/chosen": -1.701171875, "logits/rejected": -1.654296875, "logps/chosen": -1332.375, "logps/rejected": -1418.5, "loss": 0.61279296875, "nll_loss": 1.563232421875, "rewards/accuracies": 1.0, "rewards/chosen": 3.984375, "rewards/margins": 8.2109375, "rewards/rejected": -4.21875, "step": 121 }, { "epoch": 0.3658170914542729, "grad_norm": 56.5, "learning_rate": 8.839402230183e-06, "logits/chosen": -1.666015625, "logits/rejected": -1.634765625, "logps/chosen": -534.5, "logps/rejected": -806.0, "loss": 0.6767578125, "nll_loss": 1.126953125, "rewards/accuracies": 1.0, "rewards/chosen": 2.32421875, "rewards/margins": 6.4921875, "rewards/rejected": -4.1875, "step": 122 }, { "epoch": 0.36881559220389803, "grad_norm": 56.5, "learning_rate": 8.818703112010562e-06, "logits/chosen": -1.52734375, "logits/rejected": -1.5390625, "logps/chosen": -1107.25, "logps/rejected": -1183.25, "loss": 0.6611328125, "nll_loss": 1.7822265625, "rewards/accuracies": 1.0, "rewards/chosen": 2.57421875, "rewards/margins": 7.765625, "rewards/rejected": -5.1875, "step": 123 }, { "epoch": 0.37181409295352325, "grad_norm": 110.5, "learning_rate": 8.797845732622742e-06, "logits/chosen": -1.6796875, "logits/rejected": -1.693359375, "logps/chosen": -2488.0, "logps/rejected": -2250.0, "loss": 0.8564453125, "nll_loss": 2.548828125, "rewards/accuracies": 1.0, "rewards/chosen": 3.375, "rewards/margins": 9.875, "rewards/rejected": -6.5, "step": 124 }, { "epoch": 0.3748125937031484, "grad_norm": 27.0, "learning_rate": 8.776830956426674e-06, "logits/chosen": -1.634765625, "logits/rejected": -1.693359375, "logps/chosen": -1075.0, "logps/rejected": -1270.0, "loss": 0.6455078125, "nll_loss": 1.830078125, "rewards/accuracies": 1.0, "rewards/chosen": 3.212890625, "rewards/margins": 7.96875, "rewards/rejected": -4.7578125, "step": 125 }, { "epoch": 0.3778110944527736, "grad_norm": 26.375, "learning_rate": 8.755659654352599e-06, "logits/chosen": -1.705078125, "logits/rejected": -1.66796875, "logps/chosen": -1637.0, "logps/rejected": -1700.0, "loss": 0.57861328125, "nll_loss": 1.87890625, "rewards/accuracies": 1.0, "rewards/chosen": 3.88671875, "rewards/margins": 10.71875, "rewards/rejected": -6.828125, "step": 126 }, { "epoch": 0.3808095952023988, "grad_norm": 36.0, "learning_rate": 8.734332703817771e-06, "logits/chosen": -1.677734375, "logits/rejected": -1.6875, "logps/chosen": -1456.0, "logps/rejected": -1410.0, "loss": 0.57421875, "nll_loss": 1.71875, "rewards/accuracies": 1.0, "rewards/chosen": 3.90625, "rewards/margins": 9.9375, "rewards/rejected": -6.03125, "step": 127 }, { "epoch": 0.383808095952024, "grad_norm": 34.5, "learning_rate": 8.712850988690094e-06, "logits/chosen": -1.697265625, "logits/rejected": -1.724609375, "logps/chosen": -1388.0, "logps/rejected": -1622.0, "loss": 0.55322265625, "nll_loss": 1.80859375, "rewards/accuracies": 1.0, "rewards/chosen": 5.890625, "rewards/margins": 12.828125, "rewards/rejected": -6.9375, "step": 128 }, { "epoch": 0.3868065967016492, "grad_norm": 46.5, "learning_rate": 8.691215399251489e-06, "logits/chosen": -1.74609375, "logits/rejected": -1.732421875, "logps/chosen": -1243.25, "logps/rejected": -1332.0, "loss": 0.67578125, "nll_loss": 1.51416015625, "rewards/accuracies": 1.0, "rewards/chosen": 2.66015625, "rewards/margins": 6.03125, "rewards/rejected": -3.375, "step": 129 }, { "epoch": 0.38980509745127434, "grad_norm": 89.5, "learning_rate": 8.669426832160997e-06, "logits/chosen": -1.779296875, "logits/rejected": -1.744140625, "logps/chosen": -1364.0, "logps/rejected": -1410.0, "loss": 0.6494140625, "nll_loss": 1.65234375, "rewards/accuracies": 1.0, "rewards/chosen": 4.359375, "rewards/margins": 7.96875, "rewards/rejected": -3.6015625, "step": 130 }, { "epoch": 0.39280359820089955, "grad_norm": 82.5, "learning_rate": 8.647486190417624e-06, "logits/chosen": -1.599609375, "logits/rejected": -1.63671875, "logps/chosen": -1187.5625, "logps/rejected": -1250.25, "loss": 0.7490234375, "nll_loss": 1.5283203125, "rewards/accuracies": 1.0, "rewards/chosen": 4.240234375, "rewards/margins": 7.0546875, "rewards/rejected": -2.8125, "step": 131 }, { "epoch": 0.39580209895052476, "grad_norm": 53.25, "learning_rate": 8.625394383322914e-06, "logits/chosen": -1.677734375, "logits/rejected": -1.73046875, "logps/chosen": -1205.0, "logps/rejected": -1314.0, "loss": 0.8173828125, "nll_loss": 1.82421875, "rewards/accuracies": 1.0, "rewards/chosen": 4.3515625, "rewards/margins": 6.21875, "rewards/rejected": -1.8515625, "step": 132 }, { "epoch": 0.3988005997001499, "grad_norm": 43.5, "learning_rate": 8.603152326443262e-06, "logits/chosen": -1.779296875, "logits/rejected": -1.80078125, "logps/chosen": -1489.0, "logps/rejected": -1477.0, "loss": 0.7080078125, "nll_loss": 1.990234375, "rewards/accuracies": 1.0, "rewards/chosen": 3.50390625, "rewards/margins": 7.46875, "rewards/rejected": -3.97265625, "step": 133 }, { "epoch": 0.4017991004497751, "grad_norm": 74.0, "learning_rate": 8.580760941571968e-06, "logits/chosen": -1.84375, "logits/rejected": -1.875, "logps/chosen": -1790.0, "logps/rejected": -1952.0, "loss": 0.75, "nll_loss": 2.193359375, "rewards/accuracies": 1.0, "rewards/chosen": 3.390625, "rewards/margins": 8.3046875, "rewards/rejected": -4.9140625, "step": 134 }, { "epoch": 0.4047976011994003, "grad_norm": 58.25, "learning_rate": 8.55822115669104e-06, "logits/chosen": -1.59765625, "logits/rejected": -1.734375, "logps/chosen": -1330.125, "logps/rejected": -1292.625, "loss": 0.77734375, "nll_loss": 1.89501953125, "rewards/accuracies": 1.0, "rewards/chosen": 1.947265625, "rewards/margins": 6.953125, "rewards/rejected": -5.015625, "step": 135 }, { "epoch": 0.4077961019490255, "grad_norm": 23.125, "learning_rate": 8.535533905932739e-06, "logits/chosen": -1.751953125, "logits/rejected": -1.7578125, "logps/chosen": -918.0, "logps/rejected": -1206.0, "loss": 0.47705078125, "nll_loss": 1.18603515625, "rewards/accuracies": 1.0, "rewards/chosen": 3.515625, "rewards/margins": 9.53125, "rewards/rejected": -6.015625, "step": 136 }, { "epoch": 0.4107946026986507, "grad_norm": 32.0, "learning_rate": 8.512700129540847e-06, "logits/chosen": -1.65234375, "logits/rejected": -1.6796875, "logps/chosen": -954.625, "logps/rejected": -899.875, "loss": 0.591552734375, "nll_loss": 1.60595703125, "rewards/accuracies": 1.0, "rewards/chosen": 2.78515625, "rewards/margins": 7.3359375, "rewards/rejected": -4.55078125, "step": 137 }, { "epoch": 0.41379310344827586, "grad_norm": 74.5, "learning_rate": 8.489720773831717e-06, "logits/chosen": -1.7421875, "logits/rejected": -1.734375, "logps/chosen": -1874.0, "logps/rejected": -1946.0, "loss": 0.869140625, "nll_loss": 2.39453125, "rewards/accuracies": 1.0, "rewards/chosen": 5.1015625, "rewards/margins": 8.0625, "rewards/rejected": -2.953125, "step": 138 }, { "epoch": 0.41679160419790107, "grad_norm": 28.875, "learning_rate": 8.466596791155055e-06, "logits/chosen": -1.66015625, "logits/rejected": -1.71484375, "logps/chosen": -1147.0, "logps/rejected": -1320.0, "loss": 0.697265625, "nll_loss": 1.8203125, "rewards/accuracies": 1.0, "rewards/chosen": 3.078125, "rewards/margins": 8.9375, "rewards/rejected": -5.859375, "step": 139 }, { "epoch": 0.4197901049475262, "grad_norm": 91.0, "learning_rate": 8.443329139854434e-06, "logits/chosen": -1.673828125, "logits/rejected": -1.646484375, "logps/chosen": -1091.0, "logps/rejected": -1208.0, "loss": 0.505615234375, "nll_loss": 1.451171875, "rewards/accuracies": 1.0, "rewards/chosen": 5.078125, "rewards/margins": 11.875, "rewards/rejected": -6.78125, "step": 140 }, { "epoch": 0.42278860569715143, "grad_norm": 47.5, "learning_rate": 8.419918784227592e-06, "logits/chosen": -1.767578125, "logits/rejected": -1.763671875, "logps/chosen": -1792.0, "logps/rejected": -1844.0, "loss": 0.724609375, "nll_loss": 2.212890625, "rewards/accuracies": 1.0, "rewards/chosen": 4.03125, "rewards/margins": 8.7421875, "rewards/rejected": -4.7109375, "step": 141 }, { "epoch": 0.4257871064467766, "grad_norm": 42.25, "learning_rate": 8.396366694486466e-06, "logits/chosen": -1.599609375, "logits/rejected": -1.6015625, "logps/chosen": -1004.5, "logps/rejected": -1242.0, "loss": 0.5654296875, "nll_loss": 1.5791015625, "rewards/accuracies": 1.0, "rewards/chosen": 3.91796875, "rewards/margins": 10.09375, "rewards/rejected": -6.1796875, "step": 142 }, { "epoch": 0.4287856071964018, "grad_norm": 26.5, "learning_rate": 8.372673846716977e-06, "logits/chosen": -1.80859375, "logits/rejected": -1.810546875, "logps/chosen": -1372.0, "logps/rejected": -1631.0, "loss": 0.625, "nll_loss": 1.96484375, "rewards/accuracies": 1.0, "rewards/chosen": 4.2109375, "rewards/margins": 10.15625, "rewards/rejected": -5.9453125, "step": 143 }, { "epoch": 0.431784107946027, "grad_norm": 46.5, "learning_rate": 8.348841222838579e-06, "logits/chosen": -1.77734375, "logits/rejected": -1.7421875, "logps/chosen": -1485.0, "logps/rejected": -1534.0, "loss": 0.6240234375, "nll_loss": 1.837890625, "rewards/accuracies": 1.0, "rewards/chosen": 4.0703125, "rewards/margins": 8.9765625, "rewards/rejected": -4.90625, "step": 144 }, { "epoch": 0.43478260869565216, "grad_norm": 43.75, "learning_rate": 8.324869810563573e-06, "logits/chosen": -1.716796875, "logits/rejected": -1.693359375, "logps/chosen": -1626.0, "logps/rejected": -1632.0, "loss": 0.6923828125, "nll_loss": 2.09765625, "rewards/accuracies": 1.0, "rewards/chosen": 4.52734375, "rewards/margins": 9.46875, "rewards/rejected": -4.9375, "step": 145 }, { "epoch": 0.43778110944527737, "grad_norm": 65.5, "learning_rate": 8.30076060335616e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.771484375, "logps/chosen": -1584.0, "logps/rejected": -1770.0, "loss": 0.6015625, "nll_loss": 1.904296875, "rewards/accuracies": 1.0, "rewards/chosen": 3.9296875, "rewards/margins": 10.6171875, "rewards/rejected": -6.6875, "step": 146 }, { "epoch": 0.4407796101949025, "grad_norm": 27.5, "learning_rate": 8.276514600391272e-06, "logits/chosen": -1.646484375, "logits/rejected": -1.65625, "logps/chosen": -968.5, "logps/rejected": -1189.0, "loss": 0.5771484375, "nll_loss": 1.681640625, "rewards/accuracies": 1.0, "rewards/chosen": 4.62890625, "rewards/margins": 10.515625, "rewards/rejected": -5.89453125, "step": 147 }, { "epoch": 0.44377811094452774, "grad_norm": 24.125, "learning_rate": 8.25213280651317e-06, "logits/chosen": -1.703125, "logits/rejected": -1.71484375, "logps/chosen": -1256.375, "logps/rejected": -1589.25, "loss": 0.5576171875, "nll_loss": 1.766845703125, "rewards/accuracies": 1.0, "rewards/chosen": 3.91015625, "rewards/margins": 9.640625, "rewards/rejected": -5.7265625, "step": 148 }, { "epoch": 0.44677661169415295, "grad_norm": 65.5, "learning_rate": 8.227616232193794e-06, "logits/chosen": -1.78125, "logits/rejected": -1.7421875, "logps/chosen": -2288.0, "logps/rejected": -2264.0, "loss": 0.783203125, "nll_loss": 2.48828125, "rewards/accuracies": 1.0, "rewards/chosen": 3.859375, "rewards/margins": 10.71875, "rewards/rejected": -6.859375, "step": 149 }, { "epoch": 0.4497751124437781, "grad_norm": 21.25, "learning_rate": 8.202965893490877e-06, "logits/chosen": -1.6640625, "logits/rejected": -1.689453125, "logps/chosen": -500.125, "logps/rejected": -871.0, "loss": 0.44189453125, "nll_loss": 1.038330078125, "rewards/accuracies": 1.0, "rewards/chosen": 2.8203125, "rewards/margins": 8.515625, "rewards/rejected": -5.6875, "step": 150 }, { "epoch": 0.4527736131934033, "grad_norm": 22.75, "learning_rate": 8.178182812005853e-06, "logits/chosen": -1.599609375, "logits/rejected": -1.611328125, "logps/chosen": -722.28125, "logps/rejected": -792.0, "loss": 0.4716796875, "nll_loss": 1.11962890625, "rewards/accuracies": 1.0, "rewards/chosen": 3.11328125, "rewards/margins": 7.09375, "rewards/rejected": -3.984375, "step": 151 }, { "epoch": 0.45577211394302847, "grad_norm": 49.75, "learning_rate": 8.153268014841507e-06, "logits/chosen": -1.673828125, "logits/rejected": -1.673828125, "logps/chosen": -1038.75, "logps/rejected": -1381.0, "loss": 0.568359375, "nll_loss": 1.6015625, "rewards/accuracies": 1.0, "rewards/chosen": 4.640625, "rewards/margins": 8.625, "rewards/rejected": -3.9921875, "step": 152 }, { "epoch": 0.4587706146926537, "grad_norm": 66.0, "learning_rate": 8.128222534559406e-06, "logits/chosen": -1.611328125, "logits/rejected": -1.701171875, "logps/chosen": -1522.203125, "logps/rejected": -1810.75, "loss": 0.57080078125, "nll_loss": 1.6414794921875, "rewards/accuracies": 1.0, "rewards/chosen": 4.83984375, "rewards/margins": 8.53125, "rewards/rejected": -3.6875, "step": 153 }, { "epoch": 0.4617691154422789, "grad_norm": 46.25, "learning_rate": 8.103047409137114e-06, "logits/chosen": -1.728515625, "logits/rejected": -1.7734375, "logps/chosen": -927.875, "logps/rejected": -1013.125, "loss": 0.59033203125, "nll_loss": 1.43798828125, "rewards/accuracies": 1.0, "rewards/chosen": 3.5703125, "rewards/margins": 7.875, "rewards/rejected": -4.3125, "step": 154 }, { "epoch": 0.46476761619190404, "grad_norm": 31.5, "learning_rate": 8.07774368192517e-06, "logits/chosen": -1.708984375, "logits/rejected": -1.728515625, "logps/chosen": -1434.875, "logps/rejected": -1416.0, "loss": 0.615234375, "nll_loss": 1.69482421875, "rewards/accuracies": 1.0, "rewards/chosen": 3.833984375, "rewards/margins": 8.578125, "rewards/rejected": -4.7421875, "step": 155 }, { "epoch": 0.46776611694152925, "grad_norm": 24.0, "learning_rate": 8.052312401603848e-06, "logits/chosen": -1.6328125, "logits/rejected": -1.671875, "logps/chosen": -1212.0, "logps/rejected": -1376.0, "loss": 0.4765625, "nll_loss": 1.5703125, "rewards/accuracies": 1.0, "rewards/chosen": 4.71875, "rewards/margins": 10.140625, "rewards/rejected": -5.421875, "step": 156 }, { "epoch": 0.4707646176911544, "grad_norm": 41.5, "learning_rate": 8.026754622139691e-06, "logits/chosen": -1.7109375, "logits/rejected": -1.71875, "logps/chosen": -1130.0, "logps/rejected": -1293.0, "loss": 0.55859375, "nll_loss": 1.576171875, "rewards/accuracies": 1.0, "rewards/chosen": 3.4609375, "rewards/margins": 10.34375, "rewards/rejected": -6.8828125, "step": 157 }, { "epoch": 0.4737631184407796, "grad_norm": 50.5, "learning_rate": 8.001071402741843e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.703125, "logps/chosen": -862.0, "logps/rejected": -1245.0, "loss": 0.5537109375, "nll_loss": 1.369140625, "rewards/accuracies": 1.0, "rewards/chosen": 3.38671875, "rewards/margins": 8.34375, "rewards/rejected": -4.96875, "step": 158 }, { "epoch": 0.4767616191904048, "grad_norm": 21.0, "learning_rate": 7.975263807818136e-06, "logits/chosen": -1.72265625, "logits/rejected": -1.69921875, "logps/chosen": -802.625, "logps/rejected": -869.75, "loss": 0.49462890625, "nll_loss": 1.310546875, "rewards/accuracies": 1.0, "rewards/chosen": 2.84765625, "rewards/margins": 7.1875, "rewards/rejected": -4.328125, "step": 159 }, { "epoch": 0.47976011994003, "grad_norm": 57.0, "learning_rate": 7.949332906930995e-06, "logits/chosen": -1.603515625, "logits/rejected": -1.630859375, "logps/chosen": -857.0, "logps/rejected": -1061.0, "loss": 0.62939453125, "nll_loss": 1.65625, "rewards/accuracies": 1.0, "rewards/chosen": 4.265625, "rewards/margins": 7.6953125, "rewards/rejected": -3.4375, "step": 160 }, { "epoch": 0.4827586206896552, "grad_norm": 45.25, "learning_rate": 7.923279774753092e-06, "logits/chosen": -1.73046875, "logits/rejected": -1.6796875, "logps/chosen": -1773.0, "logps/rejected": -1804.0, "loss": 0.8701171875, "nll_loss": 1.921875, "rewards/accuracies": 1.0, "rewards/chosen": 3.21484375, "rewards/margins": 8.25, "rewards/rejected": -5.03125, "step": 161 }, { "epoch": 0.48575712143928035, "grad_norm": 32.0, "learning_rate": 7.897105491022819e-06, "logits/chosen": -1.64453125, "logits/rejected": -1.630859375, "logps/chosen": -1144.625, "logps/rejected": -1312.0, "loss": 0.4794921875, "nll_loss": 1.552734375, "rewards/accuracies": 1.0, "rewards/chosen": 4.29296875, "rewards/margins": 10.84375, "rewards/rejected": -6.5546875, "step": 162 }, { "epoch": 0.48875562218890556, "grad_norm": 29.5, "learning_rate": 7.870811140499543e-06, "logits/chosen": -1.724609375, "logits/rejected": -1.64453125, "logps/chosen": -1252.0, "logps/rejected": -1311.0, "loss": 0.5830078125, "nll_loss": 1.84375, "rewards/accuracies": 1.0, "rewards/chosen": 3.7890625, "rewards/margins": 13.140625, "rewards/rejected": -9.3671875, "step": 163 }, { "epoch": 0.4917541229385307, "grad_norm": 37.5, "learning_rate": 7.844397812918637e-06, "logits/chosen": -1.81640625, "logits/rejected": -1.787109375, "logps/chosen": -1378.0, "logps/rejected": -1546.0, "loss": 0.6435546875, "nll_loss": 1.900390625, "rewards/accuracies": 1.0, "rewards/chosen": 3.8671875, "rewards/margins": 7.921875, "rewards/rejected": -4.0546875, "step": 164 }, { "epoch": 0.4947526236881559, "grad_norm": 33.0, "learning_rate": 7.817866602946326e-06, "logits/chosen": -1.654296875, "logits/rejected": -1.630859375, "logps/chosen": -870.5, "logps/rejected": -933.75, "loss": 0.5341796875, "nll_loss": 1.21484375, "rewards/accuracies": 1.0, "rewards/chosen": 3.765625, "rewards/margins": 7.3828125, "rewards/rejected": -3.6171875, "step": 165 }, { "epoch": 0.49775112443778113, "grad_norm": 67.0, "learning_rate": 7.791218610134324e-06, "logits/chosen": -1.751953125, "logits/rejected": -1.76953125, "logps/chosen": -1784.0, "logps/rejected": -1921.0, "loss": 0.623046875, "nll_loss": 1.955078125, "rewards/accuracies": 1.0, "rewards/chosen": 5.10546875, "rewards/margins": 9.359375, "rewards/rejected": -4.25, "step": 166 }, { "epoch": 0.5007496251874063, "grad_norm": 73.0, "learning_rate": 7.764454938874252e-06, "logits/chosen": -1.78125, "logits/rejected": -1.853515625, "logps/chosen": -1023.0, "logps/rejected": -1186.5, "loss": 0.6767578125, "nll_loss": 1.677734375, "rewards/accuracies": 1.0, "rewards/chosen": 3.92578125, "rewards/margins": 7.4296875, "rewards/rejected": -3.5, "step": 167 }, { "epoch": 0.5037481259370314, "grad_norm": 15.0625, "learning_rate": 7.737576698351878e-06, "logits/chosen": -1.83984375, "logits/rejected": -1.79296875, "logps/chosen": -2036.0, "logps/rejected": -2070.0, "loss": 0.5791015625, "nll_loss": 2.158203125, "rewards/accuracies": 1.0, "rewards/chosen": 5.125, "rewards/margins": 11.875, "rewards/rejected": -6.75, "step": 168 }, { "epoch": 0.5067466266866567, "grad_norm": 15.4375, "learning_rate": 7.710585002501145e-06, "logits/chosen": -1.70703125, "logits/rejected": -1.71484375, "logps/chosen": -807.125, "logps/rejected": -880.5, "loss": 0.525390625, "nll_loss": 1.0146484375, "rewards/accuracies": 1.0, "rewards/chosen": 2.58203125, "rewards/margins": 7.171875, "rewards/rejected": -4.5859375, "step": 169 }, { "epoch": 0.5097451274362819, "grad_norm": 45.5, "learning_rate": 7.683480969958005e-06, "logits/chosen": -1.740234375, "logits/rejected": -1.728515625, "logps/chosen": -1052.0, "logps/rejected": -1072.0, "loss": 0.609375, "nll_loss": 1.5546875, "rewards/accuracies": 1.0, "rewards/chosen": 2.140625, "rewards/margins": 8.421875, "rewards/rejected": -6.28125, "step": 170 }, { "epoch": 0.512743628185907, "grad_norm": 17.625, "learning_rate": 7.656265724014054e-06, "logits/chosen": -1.615234375, "logits/rejected": -1.611328125, "logps/chosen": -435.5, "logps/rejected": -552.125, "loss": 0.466796875, "nll_loss": 0.8681640625, "rewards/accuracies": 1.0, "rewards/chosen": 2.078125, "rewards/margins": 6.640625, "rewards/rejected": -4.5625, "step": 171 }, { "epoch": 0.5157421289355323, "grad_norm": 55.5, "learning_rate": 7.628940392569995e-06, "logits/chosen": -1.654296875, "logits/rejected": -1.662109375, "logps/chosen": -784.25, "logps/rejected": -1066.0, "loss": 0.6005859375, "nll_loss": 1.3203125, "rewards/accuracies": 1.0, "rewards/chosen": 4.77734375, "rewards/margins": 8.5, "rewards/rejected": -3.7265625, "step": 172 }, { "epoch": 0.5187406296851574, "grad_norm": 70.5, "learning_rate": 7.601506108088874e-06, "logits/chosen": -1.79296875, "logits/rejected": -1.8046875, "logps/chosen": -1818.0, "logps/rejected": -1958.0, "loss": 0.59619140625, "nll_loss": 1.869140625, "rewards/accuracies": 1.0, "rewards/chosen": 5.65625, "rewards/margins": 9.71875, "rewards/rejected": -4.0625, "step": 173 }, { "epoch": 0.5217391304347826, "grad_norm": 35.75, "learning_rate": 7.5739640075491546e-06, "logits/chosen": -1.806640625, "logits/rejected": -1.720703125, "logps/chosen": -1321.0, "logps/rejected": -1439.5, "loss": 0.62744140625, "nll_loss": 1.97265625, "rewards/accuracies": 1.0, "rewards/chosen": 4.1015625, "rewards/margins": 9.0703125, "rewards/rejected": -4.96875, "step": 174 }, { "epoch": 0.5247376311844077, "grad_norm": 28.0, "learning_rate": 7.546315232397601e-06, "logits/chosen": -1.59375, "logits/rejected": -1.69921875, "logps/chosen": -1306.0, "logps/rejected": -1536.0, "loss": 0.5693359375, "nll_loss": 1.873046875, "rewards/accuracies": 1.0, "rewards/chosen": 4.2890625, "rewards/margins": 10.40625, "rewards/rejected": -6.125, "step": 175 }, { "epoch": 0.527736131934033, "grad_norm": 52.75, "learning_rate": 7.518560928501969e-06, "logits/chosen": -1.720703125, "logits/rejected": -1.740234375, "logps/chosen": -1345.0, "logps/rejected": -1432.0, "loss": 0.65283203125, "nll_loss": 1.833984375, "rewards/accuracies": 1.0, "rewards/chosen": 4.103515625, "rewards/margins": 12.609375, "rewards/rejected": -8.51171875, "step": 176 }, { "epoch": 0.5307346326836582, "grad_norm": 40.0, "learning_rate": 7.4907022461035125e-06, "logits/chosen": -1.638671875, "logits/rejected": -1.63671875, "logps/chosen": -1102.25, "logps/rejected": -1063.75, "loss": 0.5537109375, "nll_loss": 1.18505859375, "rewards/accuracies": 1.0, "rewards/chosen": 3.08984375, "rewards/margins": 10.4921875, "rewards/rejected": -7.40625, "step": 177 }, { "epoch": 0.5337331334332833, "grad_norm": 32.5, "learning_rate": 7.462740339769323e-06, "logits/chosen": -1.69140625, "logits/rejected": -1.744140625, "logps/chosen": -1708.0, "logps/rejected": -1704.0, "loss": 0.6611328125, "nll_loss": 2.015625, "rewards/accuracies": 1.0, "rewards/chosen": 4.4375, "rewards/margins": 9.734375, "rewards/rejected": -5.296875, "step": 178 }, { "epoch": 0.5367316341829086, "grad_norm": 48.0, "learning_rate": 7.434676368344469e-06, "logits/chosen": -1.783203125, "logits/rejected": -1.75390625, "logps/chosen": -1914.0, "logps/rejected": -1940.0, "loss": 0.626953125, "nll_loss": 2.044921875, "rewards/accuracies": 1.0, "rewards/chosen": 4.609375, "rewards/margins": 11.484375, "rewards/rejected": -6.875, "step": 179 }, { "epoch": 0.5397301349325337, "grad_norm": 7.40625, "learning_rate": 7.406511494903982e-06, "logits/chosen": -1.61328125, "logits/rejected": -1.55859375, "logps/chosen": -1046.0, "logps/rejected": -1124.0, "loss": 0.498291015625, "nll_loss": 1.625244140625, "rewards/accuracies": 1.0, "rewards/chosen": 4.203125, "rewards/margins": 12.671875, "rewards/rejected": -8.484375, "step": 180 }, { "epoch": 0.5427286356821589, "grad_norm": 21.75, "learning_rate": 7.378246886704638e-06, "logits/chosen": -1.630859375, "logits/rejected": -1.66796875, "logps/chosen": -1207.0, "logps/rejected": -1420.0, "loss": 0.54052734375, "nll_loss": 1.6796875, "rewards/accuracies": 1.0, "rewards/chosen": 3.90625, "rewards/margins": 9.5, "rewards/rejected": -5.5859375, "step": 181 }, { "epoch": 0.545727136431784, "grad_norm": 36.25, "learning_rate": 7.349883715136601e-06, "logits/chosen": -1.59765625, "logits/rejected": -1.599609375, "logps/chosen": -1302.0, "logps/rejected": -1336.0, "loss": 0.6962890625, "nll_loss": 1.96875, "rewards/accuracies": 1.0, "rewards/chosen": 3.068359375, "rewards/margins": 9.5546875, "rewards/rejected": -6.484375, "step": 182 }, { "epoch": 0.5487256371814093, "grad_norm": 11.3125, "learning_rate": 7.321423155674858e-06, "logits/chosen": -1.79296875, "logits/rejected": -1.779296875, "logps/chosen": -1059.0, "logps/rejected": -1222.0, "loss": 0.59765625, "nll_loss": 1.802734375, "rewards/accuracies": 1.0, "rewards/chosen": 3.62890625, "rewards/margins": 8.546875, "rewards/rejected": -4.90625, "step": 183 }, { "epoch": 0.5517241379310345, "grad_norm": 46.25, "learning_rate": 7.292866387830515e-06, "logits/chosen": -1.693359375, "logits/rejected": -1.6015625, "logps/chosen": -1292.375, "logps/rejected": -1353.5625, "loss": 0.67578125, "nll_loss": 1.42626953125, "rewards/accuracies": 1.0, "rewards/chosen": 2.45703125, "rewards/margins": 6.015625, "rewards/rejected": -3.55859375, "step": 184 }, { "epoch": 0.5547226386806596, "grad_norm": 15.0625, "learning_rate": 7.264214595101913e-06, "logits/chosen": -1.61328125, "logits/rejected": -1.615234375, "logps/chosen": -1174.125, "logps/rejected": -1231.0, "loss": 0.4501953125, "nll_loss": 1.28564453125, "rewards/accuracies": 1.0, "rewards/chosen": 3.5625, "rewards/margins": 11.4375, "rewards/rejected": -7.8671875, "step": 185 }, { "epoch": 0.5577211394302849, "grad_norm": 28.25, "learning_rate": 7.235468964925571e-06, "logits/chosen": -1.79296875, "logits/rejected": -1.794921875, "logps/chosen": -761.0625, "logps/rejected": -856.5625, "loss": 0.51318359375, "nll_loss": 1.46435546875, "rewards/accuracies": 1.0, "rewards/chosen": 3.75, "rewards/margins": 8.6953125, "rewards/rejected": -4.953125, "step": 186 }, { "epoch": 0.56071964017991, "grad_norm": 29.875, "learning_rate": 7.206630688626981e-06, "logits/chosen": -1.529296875, "logits/rejected": -1.591796875, "logps/chosen": -1120.25, "logps/rejected": -1366.0, "loss": 0.489013671875, "nll_loss": 1.61376953125, "rewards/accuracies": 1.0, "rewards/chosen": 3.9453125, "rewards/margins": 12.796875, "rewards/rejected": -8.859375, "step": 187 }, { "epoch": 0.5637181409295352, "grad_norm": 8.5625, "learning_rate": 7.177700961371239e-06, "logits/chosen": -1.673828125, "logits/rejected": -1.673828125, "logps/chosen": -423.8125, "logps/rejected": -450.5, "loss": 0.392578125, "nll_loss": 0.90478515625, "rewards/accuracies": 1.0, "rewards/chosen": 2.7578125, "rewards/margins": 9.1640625, "rewards/rejected": -6.3984375, "step": 188 }, { "epoch": 0.5667166416791605, "grad_norm": 47.75, "learning_rate": 7.148680982113502e-06, "logits/chosen": -1.685546875, "logits/rejected": -1.720703125, "logps/chosen": -1484.0, "logps/rejected": -1674.0, "loss": 0.56640625, "nll_loss": 1.900390625, "rewards/accuracies": 1.0, "rewards/chosen": 6.8828125, "rewards/margins": 14.34375, "rewards/rejected": -7.453125, "step": 189 }, { "epoch": 0.5697151424287856, "grad_norm": 72.0, "learning_rate": 7.119571953549305e-06, "logits/chosen": -1.677734375, "logits/rejected": -1.6953125, "logps/chosen": -1128.1875, "logps/rejected": -1395.5, "loss": 0.5537109375, "nll_loss": 1.513671875, "rewards/accuracies": 1.0, "rewards/chosen": 4.984375, "rewards/margins": 10.75, "rewards/rejected": -5.765625, "step": 190 }, { "epoch": 0.5727136431784108, "grad_norm": 45.75, "learning_rate": 7.0903750820647175e-06, "logits/chosen": -1.78515625, "logits/rejected": -1.708984375, "logps/chosen": -858.75, "logps/rejected": -956.5, "loss": 0.50341796875, "nll_loss": 1.38330078125, "rewards/accuracies": 1.0, "rewards/chosen": 3.9375, "rewards/margins": 8.140625, "rewards/rejected": -4.203125, "step": 191 }, { "epoch": 0.5757121439280359, "grad_norm": 22.75, "learning_rate": 7.061091577686349e-06, "logits/chosen": -1.759765625, "logits/rejected": -1.8046875, "logps/chosen": -1055.5, "logps/rejected": -1149.5, "loss": 0.58642578125, "nll_loss": 1.42431640625, "rewards/accuracies": 1.0, "rewards/chosen": 3.828125, "rewards/margins": 9.125, "rewards/rejected": -5.296875, "step": 192 }, { "epoch": 0.5787106446776612, "grad_norm": 75.5, "learning_rate": 7.031722654031192e-06, "logits/chosen": -1.775390625, "logits/rejected": -1.751953125, "logps/chosen": -1746.0, "logps/rejected": -1844.0, "loss": 0.77734375, "nll_loss": 2.291015625, "rewards/accuracies": 1.0, "rewards/chosen": 5.80859375, "rewards/margins": 9.703125, "rewards/rejected": -3.8984375, "step": 193 }, { "epoch": 0.5817091454272864, "grad_norm": 21.875, "learning_rate": 7.002269528256334e-06, "logits/chosen": -1.662109375, "logits/rejected": -1.658203125, "logps/chosen": -1760.0, "logps/rejected": -1872.0, "loss": 0.5302734375, "nll_loss": 1.912109375, "rewards/accuracies": 1.0, "rewards/chosen": 5.296875, "rewards/margins": 17.09375, "rewards/rejected": -11.78125, "step": 194 }, { "epoch": 0.5847076461769115, "grad_norm": 35.0, "learning_rate": 6.972733421008505e-06, "logits/chosen": -1.7109375, "logits/rejected": -1.70703125, "logps/chosen": -1538.0, "logps/rejected": -1708.0, "loss": 0.7744140625, "nll_loss": 2.29296875, "rewards/accuracies": 1.0, "rewards/chosen": 3.828125, "rewards/margins": 9.140625, "rewards/rejected": -5.3203125, "step": 195 }, { "epoch": 0.5877061469265368, "grad_norm": 22.625, "learning_rate": 6.943115556373503e-06, "logits/chosen": -1.63671875, "logits/rejected": -1.650390625, "logps/chosen": -1166.0, "logps/rejected": -1293.0, "loss": 0.470703125, "nll_loss": 1.443359375, "rewards/accuracies": 1.0, "rewards/chosen": 4.28125, "rewards/margins": 10.578125, "rewards/rejected": -6.296875, "step": 196 }, { "epoch": 0.5907046476761619, "grad_norm": 18.75, "learning_rate": 6.913417161825449e-06, "logits/chosen": -1.685546875, "logits/rejected": -1.65625, "logps/chosen": -1334.0, "logps/rejected": -1528.0, "loss": 0.591796875, "nll_loss": 1.921875, "rewards/accuracies": 1.0, "rewards/chosen": 4.296875, "rewards/margins": 11.0390625, "rewards/rejected": -6.7421875, "step": 197 }, { "epoch": 0.5937031484257871, "grad_norm": 49.0, "learning_rate": 6.883639468175926e-06, "logits/chosen": -1.66796875, "logits/rejected": -1.650390625, "logps/chosen": -1457.75, "logps/rejected": -1496.0, "loss": 0.662109375, "nll_loss": 1.81884765625, "rewards/accuracies": 1.0, "rewards/chosen": 3.33203125, "rewards/margins": 11.1796875, "rewards/rejected": -7.8828125, "step": 198 }, { "epoch": 0.5967016491754122, "grad_norm": 40.5, "learning_rate": 6.853783709522963e-06, "logits/chosen": -1.57421875, "logits/rejected": -1.623046875, "logps/chosen": -1197.0, "logps/rejected": -1352.0, "loss": 0.7451171875, "nll_loss": 1.91015625, "rewards/accuracies": 1.0, "rewards/chosen": 2.9375, "rewards/margins": 7.1171875, "rewards/rejected": -4.1796875, "step": 199 }, { "epoch": 0.5997001499250375, "grad_norm": 16.875, "learning_rate": 6.823851123199894e-06, "logits/chosen": -1.716796875, "logits/rejected": -1.69140625, "logps/chosen": -1744.0, "logps/rejected": -1844.0, "loss": 0.56640625, "nll_loss": 2.04296875, "rewards/accuracies": 1.0, "rewards/chosen": 4.82421875, "rewards/margins": 14.609375, "rewards/rejected": -9.78125, "step": 200 }, { "epoch": 0.6026986506746627, "grad_norm": 14.75, "learning_rate": 6.793842949724074e-06, "logits/chosen": -1.6875, "logits/rejected": -1.69921875, "logps/chosen": -1252.15625, "logps/rejected": -1181.1875, "loss": 0.5322265625, "nll_loss": 1.7593994140625, "rewards/accuracies": 1.0, "rewards/chosen": 3.5, "rewards/margins": 10.671875, "rewards/rejected": -7.1640625, "step": 201 }, { "epoch": 0.6056971514242878, "grad_norm": 33.75, "learning_rate": 6.763760432745475e-06, "logits/chosen": -1.76171875, "logits/rejected": -1.818359375, "logps/chosen": -1228.71875, "logps/rejected": -1509.5, "loss": 0.57421875, "nll_loss": 1.74951171875, "rewards/accuracies": 1.0, "rewards/chosen": 4.51953125, "rewards/margins": 10.640625, "rewards/rejected": -6.1171875, "step": 202 }, { "epoch": 0.6086956521739131, "grad_norm": 23.375, "learning_rate": 6.733604818995133e-06, "logits/chosen": -1.7109375, "logits/rejected": -1.609375, "logps/chosen": -875.078125, "logps/rejected": -1057.0, "loss": 0.46142578125, "nll_loss": 1.115234375, "rewards/accuracies": 1.0, "rewards/chosen": 4.4140625, "rewards/margins": 9.9765625, "rewards/rejected": -5.5546875, "step": 203 }, { "epoch": 0.6116941529235382, "grad_norm": 58.0, "learning_rate": 6.703377358233489e-06, "logits/chosen": -1.68359375, "logits/rejected": -1.712890625, "logps/chosen": -1729.0, "logps/rejected": -1954.0, "loss": 0.62890625, "nll_loss": 1.9140625, "rewards/accuracies": 1.0, "rewards/chosen": 3.359375, "rewards/margins": 9.921875, "rewards/rejected": -6.5625, "step": 204 }, { "epoch": 0.6146926536731634, "grad_norm": 62.5, "learning_rate": 6.673079303198591e-06, "logits/chosen": -1.759765625, "logits/rejected": -1.751953125, "logps/chosen": -1221.0, "logps/rejected": -1140.0, "loss": 0.61376953125, "nll_loss": 1.833984375, "rewards/accuracies": 1.0, "rewards/chosen": 3.6484375, "rewards/margins": 10.375, "rewards/rejected": -6.734375, "step": 205 }, { "epoch": 0.6176911544227887, "grad_norm": 8.75, "learning_rate": 6.6427119095541745e-06, "logits/chosen": -1.783203125, "logits/rejected": -1.7734375, "logps/chosen": -1370.0, "logps/rejected": -1515.0, "loss": 0.56201171875, "nll_loss": 1.927734375, "rewards/accuracies": 1.0, "rewards/chosen": 5.203125, "rewards/margins": 11.234375, "rewards/rejected": -6.046875, "step": 206 }, { "epoch": 0.6206896551724138, "grad_norm": 33.0, "learning_rate": 6.612276435837622e-06, "logits/chosen": -1.783203125, "logits/rejected": -1.75390625, "logps/chosen": -1521.0, "logps/rejected": -1367.0, "loss": 0.595703125, "nll_loss": 2.0, "rewards/accuracies": 1.0, "rewards/chosen": 5.0625, "rewards/margins": 9.640625, "rewards/rejected": -4.578125, "step": 207 }, { "epoch": 0.623688155922039, "grad_norm": 38.0, "learning_rate": 6.58177414340781e-06, "logits/chosen": -1.65625, "logits/rejected": -1.638671875, "logps/chosen": -1010.5, "logps/rejected": -1075.75, "loss": 0.405029296875, "nll_loss": 1.109619140625, "rewards/accuracies": 1.0, "rewards/chosen": 3.84375, "rewards/margins": 11.75, "rewards/rejected": -7.890625, "step": 208 }, { "epoch": 0.6266866566716641, "grad_norm": 50.75, "learning_rate": 6.551206296392827e-06, "logits/chosen": -1.697265625, "logits/rejected": -1.7265625, "logps/chosen": -1423.0, "logps/rejected": -1568.0, "loss": 0.5517578125, "nll_loss": 1.68359375, "rewards/accuracies": 1.0, "rewards/chosen": 5.03125, "rewards/margins": 11.265625, "rewards/rejected": -6.234375, "step": 209 }, { "epoch": 0.6296851574212894, "grad_norm": 52.25, "learning_rate": 6.520574161637591e-06, "logits/chosen": -1.728515625, "logits/rejected": -1.7265625, "logps/chosen": -1304.75, "logps/rejected": -1336.625, "loss": 0.55419921875, "nll_loss": 1.566162109375, "rewards/accuracies": 1.0, "rewards/chosen": 4.375, "rewards/margins": 9.59375, "rewards/rejected": -5.2109375, "step": 210 }, { "epoch": 0.6326836581709145, "grad_norm": 54.25, "learning_rate": 6.4898790086513366e-06, "logits/chosen": -1.63671875, "logits/rejected": -1.66796875, "logps/chosen": -813.03125, "logps/rejected": -885.25, "loss": 0.56689453125, "nll_loss": 1.57763671875, "rewards/accuracies": 1.0, "rewards/chosen": 3.64453125, "rewards/margins": 8.28125, "rewards/rejected": -4.625, "step": 211 }, { "epoch": 0.6356821589205397, "grad_norm": 33.0, "learning_rate": 6.459122109555011e-06, "logits/chosen": -1.734375, "logits/rejected": -1.67578125, "logps/chosen": -1351.125, "logps/rejected": -1525.0, "loss": 0.625, "nll_loss": 1.5732421875, "rewards/accuracies": 1.0, "rewards/chosen": 3.296875, "rewards/margins": 8.03125, "rewards/rejected": -4.73046875, "step": 212 }, { "epoch": 0.638680659670165, "grad_norm": 45.25, "learning_rate": 6.42830473902855e-06, "logits/chosen": -1.771484375, "logits/rejected": -1.740234375, "logps/chosen": -1980.0, "logps/rejected": -2148.0, "loss": 0.6806640625, "nll_loss": 2.06640625, "rewards/accuracies": 1.0, "rewards/chosen": 4.75, "rewards/margins": 11.125, "rewards/rejected": -6.375, "step": 213 }, { "epoch": 0.6416791604197901, "grad_norm": 26.75, "learning_rate": 6.397428174258048e-06, "logits/chosen": -1.681640625, "logits/rejected": -1.775390625, "logps/chosen": -1752.0, "logps/rejected": -1716.0, "loss": 0.6748046875, "nll_loss": 2.41796875, "rewards/accuracies": 1.0, "rewards/chosen": 4.921875, "rewards/margins": 10.828125, "rewards/rejected": -5.90625, "step": 214 }, { "epoch": 0.6446776611694153, "grad_norm": 95.0, "learning_rate": 6.3664936948828296e-06, "logits/chosen": -1.806640625, "logits/rejected": -1.8125, "logps/chosen": -1670.0, "logps/rejected": -1818.0, "loss": 0.6787109375, "nll_loss": 1.974609375, "rewards/accuracies": 1.0, "rewards/chosen": 2.85546875, "rewards/margins": 10.8828125, "rewards/rejected": -8.03125, "step": 215 }, { "epoch": 0.6476761619190404, "grad_norm": 12.75, "learning_rate": 6.335502582942409e-06, "logits/chosen": -1.712890625, "logits/rejected": -1.685546875, "logps/chosen": -991.0, "logps/rejected": -1086.0, "loss": 0.44482421875, "nll_loss": 1.37890625, "rewards/accuracies": 1.0, "rewards/chosen": 5.5625, "rewards/margins": 13.9375, "rewards/rejected": -8.390625, "step": 216 }, { "epoch": 0.6506746626686657, "grad_norm": 32.5, "learning_rate": 6.304456122823377e-06, "logits/chosen": -1.841796875, "logits/rejected": -1.802734375, "logps/chosen": -1886.0, "logps/rejected": -2042.0, "loss": 0.6201171875, "nll_loss": 2.13671875, "rewards/accuracies": 1.0, "rewards/chosen": 5.75, "rewards/margins": 10.859375, "rewards/rejected": -5.109375, "step": 217 }, { "epoch": 0.6536731634182908, "grad_norm": 9.5, "learning_rate": 6.273355601206143e-06, "logits/chosen": -1.8046875, "logits/rejected": -1.783203125, "logps/chosen": -834.5, "logps/rejected": -1077.5, "loss": 0.545654296875, "nll_loss": 1.71435546875, "rewards/accuracies": 1.0, "rewards/chosen": 4.46875, "rewards/margins": 10.46875, "rewards/rejected": -6.015625, "step": 218 }, { "epoch": 0.656671664167916, "grad_norm": 33.5, "learning_rate": 6.24220230701164e-06, "logits/chosen": -1.83984375, "logits/rejected": -1.80078125, "logps/chosen": -1300.0, "logps/rejected": -1638.0, "loss": 0.58984375, "nll_loss": 1.958984375, "rewards/accuracies": 1.0, "rewards/chosen": 5.875, "rewards/margins": 12.28125, "rewards/rejected": -6.40625, "step": 219 }, { "epoch": 0.6596701649175413, "grad_norm": 38.5, "learning_rate": 6.210997531347879e-06, "logits/chosen": -1.685546875, "logits/rejected": -1.69921875, "logps/chosen": -1680.0, "logps/rejected": -1700.0, "loss": 0.5849609375, "nll_loss": 1.943359375, "rewards/accuracies": 1.0, "rewards/chosen": 5.421875, "rewards/margins": 14.34375, "rewards/rejected": -8.921875, "step": 220 }, { "epoch": 0.6626686656671664, "grad_norm": 32.25, "learning_rate": 6.179742567456464e-06, "logits/chosen": -1.583984375, "logits/rejected": -1.58203125, "logps/chosen": -879.25, "logps/rejected": -962.5, "loss": 0.52294921875, "nll_loss": 1.542236328125, "rewards/accuracies": 1.0, "rewards/chosen": 3.0234375, "rewards/margins": 9.5390625, "rewards/rejected": -6.515625, "step": 221 }, { "epoch": 0.6656671664167916, "grad_norm": 43.75, "learning_rate": 6.148438710658979e-06, "logits/chosen": -1.759765625, "logits/rejected": -1.759765625, "logps/chosen": -1223.0, "logps/rejected": -1305.75, "loss": 0.61279296875, "nll_loss": 1.720703125, "rewards/accuracies": 1.0, "rewards/chosen": 3.8828125, "rewards/margins": 9.3203125, "rewards/rejected": -5.4453125, "step": 222 }, { "epoch": 0.6686656671664168, "grad_norm": 65.0, "learning_rate": 6.117087258303314e-06, "logits/chosen": -1.79296875, "logits/rejected": -1.755859375, "logps/chosen": -1077.0, "logps/rejected": -1361.0, "loss": 0.68310546875, "nll_loss": 1.5771484375, "rewards/accuracies": 1.0, "rewards/chosen": 3.59375, "rewards/margins": 8.921875, "rewards/rejected": -5.328125, "step": 223 }, { "epoch": 0.671664167916042, "grad_norm": 44.75, "learning_rate": 6.085689509709893e-06, "logits/chosen": -1.837890625, "logits/rejected": -1.791015625, "logps/chosen": -1747.0, "logps/rejected": -1970.0, "loss": 0.59716796875, "nll_loss": 2.078125, "rewards/accuracies": 1.0, "rewards/chosen": 4.609375, "rewards/margins": 12.265625, "rewards/rejected": -7.65625, "step": 224 }, { "epoch": 0.6746626686656672, "grad_norm": 31.125, "learning_rate": 6.0542467661178325e-06, "logits/chosen": -1.630859375, "logits/rejected": -1.771484375, "logps/chosen": -1452.0, "logps/rejected": -1715.0, "loss": 0.5498046875, "nll_loss": 1.876953125, "rewards/accuracies": 1.0, "rewards/chosen": 5.2890625, "rewards/margins": 11.5, "rewards/rejected": -6.203125, "step": 225 }, { "epoch": 0.6776611694152923, "grad_norm": 49.75, "learning_rate": 6.022760330631006e-06, "logits/chosen": -1.693359375, "logits/rejected": -1.6484375, "logps/chosen": -782.25, "logps/rejected": -844.75, "loss": 0.5302734375, "nll_loss": 1.40380859375, "rewards/accuracies": 1.0, "rewards/chosen": 3.888671875, "rewards/margins": 9.3828125, "rewards/rejected": -5.484375, "step": 226 }, { "epoch": 0.6806596701649176, "grad_norm": 10.1875, "learning_rate": 5.991231508164037e-06, "logits/chosen": -1.751953125, "logits/rejected": -1.701171875, "logps/chosen": -1448.0, "logps/rejected": -1553.0, "loss": 0.689453125, "nll_loss": 2.484375, "rewards/accuracies": 1.0, "rewards/chosen": 4.95703125, "rewards/margins": 10.9375, "rewards/rejected": -6.0, "step": 227 }, { "epoch": 0.6836581709145427, "grad_norm": 30.25, "learning_rate": 5.959661605388229e-06, "logits/chosen": -1.77734375, "logits/rejected": -1.75, "logps/chosen": -1412.0, "logps/rejected": -1804.0, "loss": 0.5830078125, "nll_loss": 1.7890625, "rewards/accuracies": 1.0, "rewards/chosen": 4.83984375, "rewards/margins": 12.15625, "rewards/rejected": -7.3125, "step": 228 }, { "epoch": 0.6866566716641679, "grad_norm": 24.5, "learning_rate": 5.928051930677404e-06, "logits/chosen": -1.6953125, "logits/rejected": -1.6640625, "logps/chosen": -1169.25, "logps/rejected": -1307.125, "loss": 0.5185546875, "nll_loss": 1.71630859375, "rewards/accuracies": 1.0, "rewards/chosen": 4.76953125, "rewards/margins": 13.53125, "rewards/rejected": -8.7890625, "step": 229 }, { "epoch": 0.6896551724137931, "grad_norm": 28.25, "learning_rate": 5.896403794053679e-06, "logits/chosen": -1.7109375, "logits/rejected": -1.802734375, "logps/chosen": -1612.0, "logps/rejected": -1782.0, "loss": 0.626953125, "nll_loss": 2.037109375, "rewards/accuracies": 1.0, "rewards/chosen": 5.171875, "rewards/margins": 14.265625, "rewards/rejected": -9.09375, "step": 230 }, { "epoch": 0.6926536731634183, "grad_norm": 33.75, "learning_rate": 5.864718507133176e-06, "logits/chosen": -1.818359375, "logits/rejected": -1.8515625, "logps/chosen": -812.0, "logps/rejected": -1038.0, "loss": 0.642578125, "nll_loss": 1.931640625, "rewards/accuracies": 1.0, "rewards/chosen": 3.5703125, "rewards/margins": 8.421875, "rewards/rejected": -4.84375, "step": 231 }, { "epoch": 0.6956521739130435, "grad_norm": 22.5, "learning_rate": 5.83299738307166e-06, "logits/chosen": -1.5703125, "logits/rejected": -1.6640625, "logps/chosen": -871.484375, "logps/rejected": -1067.5, "loss": 0.63232421875, "nll_loss": 1.5755615234375, "rewards/accuracies": 0.875, "rewards/chosen": 3.734375, "rewards/margins": 8.78125, "rewards/rejected": -5.0390625, "step": 232 }, { "epoch": 0.6986506746626686, "grad_norm": 19.0, "learning_rate": 5.801241736510128e-06, "logits/chosen": -1.720703125, "logits/rejected": -1.76953125, "logps/chosen": -1410.0, "logps/rejected": -1494.0, "loss": 0.7099609375, "nll_loss": 2.193359375, "rewards/accuracies": 1.0, "rewards/chosen": 4.484375, "rewards/margins": 11.015625, "rewards/rejected": -6.53125, "step": 233 }, { "epoch": 0.7016491754122939, "grad_norm": 47.0, "learning_rate": 5.76945288352031e-06, "logits/chosen": -1.763671875, "logits/rejected": -1.798828125, "logps/chosen": -1369.0, "logps/rejected": -1628.0, "loss": 0.57958984375, "nll_loss": 1.720703125, "rewards/accuracies": 1.0, "rewards/chosen": 3.875, "rewards/margins": 10.578125, "rewards/rejected": -6.703125, "step": 234 }, { "epoch": 0.704647676161919, "grad_norm": 12.5625, "learning_rate": 5.7376321415501356e-06, "logits/chosen": -1.826171875, "logits/rejected": -1.80859375, "logps/chosen": -1571.0, "logps/rejected": -1778.0, "loss": 0.53076171875, "nll_loss": 1.9375, "rewards/accuracies": 1.0, "rewards/chosen": 4.6171875, "rewards/margins": 10.421875, "rewards/rejected": -5.8046875, "step": 235 }, { "epoch": 0.7076461769115442, "grad_norm": 22.0, "learning_rate": 5.7057808293691305e-06, "logits/chosen": -1.681640625, "logits/rejected": -1.68359375, "logps/chosen": -918.0, "logps/rejected": -1000.0, "loss": 0.62646484375, "nll_loss": 1.724609375, "rewards/accuracies": 1.0, "rewards/chosen": 3.3359375, "rewards/margins": 9.421875, "rewards/rejected": -6.078125, "step": 236 }, { "epoch": 0.7106446776611695, "grad_norm": 23.875, "learning_rate": 5.67390026701377e-06, "logits/chosen": -1.796875, "logits/rejected": -1.7890625, "logps/chosen": -1216.0, "logps/rejected": -1272.0, "loss": 0.5439453125, "nll_loss": 1.720703125, "rewards/accuracies": 1.0, "rewards/chosen": 4.7890625, "rewards/margins": 11.140625, "rewards/rejected": -6.34375, "step": 237 }, { "epoch": 0.7136431784107946, "grad_norm": 38.0, "learning_rate": 5.641991775732756e-06, "logits/chosen": -1.734375, "logits/rejected": -1.685546875, "logps/chosen": -1150.0, "logps/rejected": -1274.0, "loss": 0.64697265625, "nll_loss": 1.87109375, "rewards/accuracies": 1.0, "rewards/chosen": 3.52734375, "rewards/margins": 9.6328125, "rewards/rejected": -6.1015625, "step": 238 }, { "epoch": 0.7166416791604198, "grad_norm": 29.0, "learning_rate": 5.610056677932274e-06, "logits/chosen": -1.525390625, "logits/rejected": -1.54296875, "logps/chosen": -524.6875, "logps/rejected": -573.625, "loss": 0.39794921875, "nll_loss": 0.998779296875, "rewards/accuracies": 1.0, "rewards/chosen": 2.9140625, "rewards/margins": 8.484375, "rewards/rejected": -5.5625, "step": 239 }, { "epoch": 0.719640179910045, "grad_norm": 30.5, "learning_rate": 5.5780962971211795e-06, "logits/chosen": -1.6953125, "logits/rejected": -1.71484375, "logps/chosen": -1165.0, "logps/rejected": -1233.0, "loss": 0.5751953125, "nll_loss": 1.763671875, "rewards/accuracies": 1.0, "rewards/chosen": 3.6953125, "rewards/margins": 9.40625, "rewards/rejected": -5.71875, "step": 240 }, { "epoch": 0.7226386806596702, "grad_norm": 45.5, "learning_rate": 5.546111957856155e-06, "logits/chosen": -1.814453125, "logits/rejected": -1.849609375, "logps/chosen": -2036.0, "logps/rejected": -2018.0, "loss": 0.685546875, "nll_loss": 2.298828125, "rewards/accuracies": 1.0, "rewards/chosen": 5.125, "rewards/margins": 10.25, "rewards/rejected": -5.125, "step": 241 }, { "epoch": 0.7256371814092953, "grad_norm": 37.5, "learning_rate": 5.514104985686802e-06, "logits/chosen": -1.7421875, "logits/rejected": -1.703125, "logps/chosen": -1124.25, "logps/rejected": -1128.5, "loss": 0.54736328125, "nll_loss": 1.5068359375, "rewards/accuracies": 1.0, "rewards/chosen": 3.34375, "rewards/margins": 10.578125, "rewards/rejected": -7.25, "step": 242 }, { "epoch": 0.7286356821589205, "grad_norm": 26.5, "learning_rate": 5.482076707100723e-06, "logits/chosen": -1.69140625, "logits/rejected": -1.65234375, "logps/chosen": -1042.1875, "logps/rejected": -1090.25, "loss": 0.49267578125, "nll_loss": 1.499267578125, "rewards/accuracies": 1.0, "rewards/chosen": 3.7265625, "rewards/margins": 8.9765625, "rewards/rejected": -5.25, "step": 243 }, { "epoch": 0.7316341829085458, "grad_norm": 9.0625, "learning_rate": 5.4500284494685275e-06, "logits/chosen": -1.763671875, "logits/rejected": -1.833984375, "logps/chosen": -1126.0, "logps/rejected": -1384.0, "loss": 0.4873046875, "nll_loss": 1.6796875, "rewards/accuracies": 1.0, "rewards/chosen": 5.1875, "rewards/margins": 13.546875, "rewards/rejected": -8.375, "step": 244 }, { "epoch": 0.7346326836581709, "grad_norm": 13.0, "learning_rate": 5.417961540988837e-06, "logits/chosen": -1.640625, "logits/rejected": -1.697265625, "logps/chosen": -1245.125, "logps/rejected": -1597.5, "loss": 0.43212890625, "nll_loss": 1.4794921875, "rewards/accuracies": 1.0, "rewards/chosen": 5.4609375, "rewards/margins": 13.46875, "rewards/rejected": -8.0, "step": 245 }, { "epoch": 0.7376311844077961, "grad_norm": 16.5, "learning_rate": 5.385877310633233e-06, "logits/chosen": -1.69921875, "logits/rejected": -1.68359375, "logps/chosen": -489.5, "logps/rejected": -786.5, "loss": 0.345703125, "nll_loss": 0.82373046875, "rewards/accuracies": 1.0, "rewards/chosen": 3.4609375, "rewards/margins": 8.8125, "rewards/rejected": -5.359375, "step": 246 }, { "epoch": 0.7406296851574213, "grad_norm": 35.0, "learning_rate": 5.353777088091177e-06, "logits/chosen": -1.607421875, "logits/rejected": -1.59765625, "logps/chosen": -1627.0, "logps/rejected": -1680.0, "loss": 0.611328125, "nll_loss": 2.056640625, "rewards/accuracies": 1.0, "rewards/chosen": 6.4296875, "rewards/margins": 14.265625, "rewards/rejected": -7.84375, "step": 247 }, { "epoch": 0.7436281859070465, "grad_norm": 23.0, "learning_rate": 5.321662203714909e-06, "logits/chosen": -1.68359375, "logits/rejected": -1.73046875, "logps/chosen": -1636.0, "logps/rejected": -1757.0, "loss": 0.54150390625, "nll_loss": 1.890625, "rewards/accuracies": 1.0, "rewards/chosen": 5.828125, "rewards/margins": 12.703125, "rewards/rejected": -6.875, "step": 248 }, { "epoch": 0.7466266866566716, "grad_norm": 25.625, "learning_rate": 5.289533988464307e-06, "logits/chosen": -1.625, "logits/rejected": -1.5517578125, "logps/chosen": -1512.875, "logps/rejected": -1639.625, "loss": 0.66015625, "nll_loss": 1.72509765625, "rewards/accuracies": 0.875, "rewards/chosen": 4.1015625, "rewards/margins": 10.1484375, "rewards/rejected": -6.0546875, "step": 249 }, { "epoch": 0.7496251874062968, "grad_norm": 6.125, "learning_rate": 5.257393773851733e-06, "logits/chosen": -1.6796875, "logits/rejected": -1.658203125, "logps/chosen": -1528.0, "logps/rejected": -1630.0, "loss": 0.494140625, "nll_loss": 1.8515625, "rewards/accuracies": 1.0, "rewards/chosen": 5.734375, "rewards/margins": 13.921875, "rewards/rejected": -8.1875, "step": 250 }, { "epoch": 0.7526236881559221, "grad_norm": 60.75, "learning_rate": 5.2252428918868446e-06, "logits/chosen": -1.69140625, "logits/rejected": -1.66796875, "logps/chosen": -1806.0, "logps/rejected": -1942.0, "loss": 0.7265625, "nll_loss": 2.3203125, "rewards/accuracies": 1.0, "rewards/chosen": 5.3828125, "rewards/margins": 9.9609375, "rewards/rejected": -4.578125, "step": 251 }, { "epoch": 0.7556221889055472, "grad_norm": 22.25, "learning_rate": 5.193082675021393e-06, "logits/chosen": -1.73046875, "logits/rejected": -1.712890625, "logps/chosen": -2152.0, "logps/rejected": -2320.0, "loss": 0.63037109375, "nll_loss": 2.302734375, "rewards/accuracies": 1.0, "rewards/chosen": 6.28125, "rewards/margins": 11.984375, "rewards/rejected": -5.703125, "step": 252 }, { "epoch": 0.7586206896551724, "grad_norm": 62.25, "learning_rate": 5.160914456094005e-06, "logits/chosen": -1.736328125, "logits/rejected": -1.724609375, "logps/chosen": -615.0, "logps/rejected": -682.625, "loss": 0.571533203125, "nll_loss": 1.18505859375, "rewards/accuracies": 1.0, "rewards/chosen": 3.45703125, "rewards/margins": 7.4140625, "rewards/rejected": -3.96875, "step": 253 }, { "epoch": 0.7616191904047976, "grad_norm": 46.25, "learning_rate": 5.1287395682749444e-06, "logits/chosen": -1.798828125, "logits/rejected": -1.7734375, "logps/chosen": -1304.0, "logps/rejected": -1418.0, "loss": 0.5908203125, "nll_loss": 1.853515625, "rewards/accuracies": 1.0, "rewards/chosen": 5.6015625, "rewards/margins": 10.40625, "rewards/rejected": -4.8046875, "step": 254 }, { "epoch": 0.7646176911544228, "grad_norm": 24.375, "learning_rate": 5.0965593450108495e-06, "logits/chosen": -1.7265625, "logits/rejected": -1.7421875, "logps/chosen": -1091.5, "logps/rejected": -1184.0, "loss": 0.5869140625, "nll_loss": 2.044921875, "rewards/accuracies": 1.0, "rewards/chosen": 5.140625, "rewards/margins": 11.359375, "rewards/rejected": -6.2265625, "step": 255 }, { "epoch": 0.767616191904048, "grad_norm": 54.0, "learning_rate": 5.064375119969491e-06, "logits/chosen": -1.697265625, "logits/rejected": -1.69140625, "logps/chosen": -1405.625, "logps/rejected": -1509.25, "loss": 0.5546875, "nll_loss": 1.7880859375, "rewards/accuracies": 1.0, "rewards/chosen": 3.73046875, "rewards/margins": 16.265625, "rewards/rejected": -12.546875, "step": 256 }, { "epoch": 0.7706146926536732, "grad_norm": 28.125, "learning_rate": 5.03218822698448e-06, "logits/chosen": -1.68359375, "logits/rejected": -1.697265625, "logps/chosen": -1122.25, "logps/rejected": -1205.75, "loss": 0.5009765625, "nll_loss": 1.689453125, "rewards/accuracies": 1.0, "rewards/chosen": 4.6328125, "rewards/margins": 11.84375, "rewards/rejected": -7.1953125, "step": 257 }, { "epoch": 0.7736131934032984, "grad_norm": 8.125, "learning_rate": 5e-06, "logits/chosen": -1.62890625, "logits/rejected": -1.599609375, "logps/chosen": -1328.0, "logps/rejected": -1572.0, "loss": 0.47412109375, "nll_loss": 1.8125, "rewards/accuracies": 1.0, "rewards/chosen": 6.3984375, "rewards/margins": 16.59375, "rewards/rejected": -10.1875, "step": 258 }, { "epoch": 0.7766116941529235, "grad_norm": 8.75, "learning_rate": 4.967811773015521e-06, "logits/chosen": -1.716796875, "logits/rejected": -1.75390625, "logps/chosen": -1272.0, "logps/rejected": -1446.0, "loss": 0.5322265625, "nll_loss": 1.875, "rewards/accuracies": 1.0, "rewards/chosen": 4.734375, "rewards/margins": 11.875, "rewards/rejected": -7.140625, "step": 259 }, { "epoch": 0.7796101949025487, "grad_norm": 6.3125, "learning_rate": 4.93562488003051e-06, "logits/chosen": -1.529296875, "logits/rejected": -1.607421875, "logps/chosen": -881.125, "logps/rejected": -986.875, "loss": 0.49365234375, "nll_loss": 1.46142578125, "rewards/accuracies": 1.0, "rewards/chosen": 4.47265625, "rewards/margins": 10.2265625, "rewards/rejected": -5.75390625, "step": 260 }, { "epoch": 0.782608695652174, "grad_norm": 51.0, "learning_rate": 4.90344065498915e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.71875, "logps/chosen": -1242.0, "logps/rejected": -1396.5, "loss": 0.6455078125, "nll_loss": 1.60498046875, "rewards/accuracies": 1.0, "rewards/chosen": 2.3125, "rewards/margins": 9.578125, "rewards/rejected": -7.265625, "step": 261 }, { "epoch": 0.7856071964017991, "grad_norm": 27.375, "learning_rate": 4.871260431725058e-06, "logits/chosen": -1.630859375, "logits/rejected": -1.72265625, "logps/chosen": -875.0, "logps/rejected": -1267.0, "loss": 0.5908203125, "nll_loss": 1.451171875, "rewards/accuracies": 1.0, "rewards/chosen": 4.4140625, "rewards/margins": 10.203125, "rewards/rejected": -5.7890625, "step": 262 }, { "epoch": 0.7886056971514243, "grad_norm": 36.5, "learning_rate": 4.8390855439059955e-06, "logits/chosen": -1.62890625, "logits/rejected": -1.705078125, "logps/chosen": -1325.0, "logps/rejected": -1452.0, "loss": 0.548828125, "nll_loss": 1.77734375, "rewards/accuracies": 1.0, "rewards/chosen": 4.28125, "rewards/margins": 10.6875, "rewards/rejected": -6.40625, "step": 263 }, { "epoch": 0.7916041979010495, "grad_norm": 50.0, "learning_rate": 4.806917324978608e-06, "logits/chosen": -1.697265625, "logits/rejected": -1.72265625, "logps/chosen": -1273.5, "logps/rejected": -1497.0, "loss": 0.6025390625, "nll_loss": 1.62890625, "rewards/accuracies": 1.0, "rewards/chosen": 5.3828125, "rewards/margins": 9.4609375, "rewards/rejected": -4.078125, "step": 264 }, { "epoch": 0.7946026986506747, "grad_norm": 48.75, "learning_rate": 4.774757108113156e-06, "logits/chosen": -1.736328125, "logits/rejected": -1.7421875, "logps/chosen": -1547.5, "logps/rejected": -1678.5, "loss": 0.58349609375, "nll_loss": 1.935546875, "rewards/accuracies": 1.0, "rewards/chosen": 4.75390625, "rewards/margins": 12.28125, "rewards/rejected": -7.53125, "step": 265 }, { "epoch": 0.7976011994002998, "grad_norm": 31.375, "learning_rate": 4.742606226148268e-06, "logits/chosen": -1.78125, "logits/rejected": -1.751953125, "logps/chosen": -777.0, "logps/rejected": -767.0, "loss": 0.6220703125, "nll_loss": 1.3740234375, "rewards/accuracies": 1.0, "rewards/chosen": 2.546875, "rewards/margins": 8.515625, "rewards/rejected": -5.9609375, "step": 266 }, { "epoch": 0.800599700149925, "grad_norm": 15.1875, "learning_rate": 4.710466011535695e-06, "logits/chosen": -1.62890625, "logits/rejected": -1.73828125, "logps/chosen": -1782.0, "logps/rejected": -1670.0, "loss": 0.56640625, "nll_loss": 2.0615234375, "rewards/accuracies": 1.0, "rewards/chosen": 4.65625, "rewards/margins": 12.34375, "rewards/rejected": -7.6875, "step": 267 }, { "epoch": 0.8035982008995503, "grad_norm": 18.0, "learning_rate": 4.678337796285093e-06, "logits/chosen": -1.728515625, "logits/rejected": -1.76953125, "logps/chosen": -1278.875, "logps/rejected": -1236.125, "loss": 0.59326171875, "nll_loss": 1.6357421875, "rewards/accuracies": 1.0, "rewards/chosen": 3.1875, "rewards/margins": 9.390625, "rewards/rejected": -6.1875, "step": 268 }, { "epoch": 0.8065967016491754, "grad_norm": 35.5, "learning_rate": 4.6462229119088234e-06, "logits/chosen": -1.85546875, "logits/rejected": -1.806640625, "logps/chosen": -1858.0, "logps/rejected": -1754.0, "loss": 0.703125, "nll_loss": 2.439453125, "rewards/accuracies": 1.0, "rewards/chosen": 5.3984375, "rewards/margins": 12.96875, "rewards/rejected": -7.578125, "step": 269 }, { "epoch": 0.8095952023988006, "grad_norm": 36.75, "learning_rate": 4.614122689366769e-06, "logits/chosen": -1.783203125, "logits/rejected": -1.78515625, "logps/chosen": -1732.0, "logps/rejected": -1936.0, "loss": 0.6005859375, "nll_loss": 2.103515625, "rewards/accuracies": 1.0, "rewards/chosen": 6.5234375, "rewards/margins": 12.0625, "rewards/rejected": -5.53125, "step": 270 }, { "epoch": 0.8125937031484258, "grad_norm": 48.0, "learning_rate": 4.582038459011165e-06, "logits/chosen": -1.65625, "logits/rejected": -1.6484375, "logps/chosen": -1510.0, "logps/rejected": -1506.0, "loss": 0.53466796875, "nll_loss": 1.7109375, "rewards/accuracies": 1.0, "rewards/chosen": 5.6015625, "rewards/margins": 14.71875, "rewards/rejected": -9.125, "step": 271 }, { "epoch": 0.815592203898051, "grad_norm": 22.25, "learning_rate": 4.549971550531474e-06, "logits/chosen": -1.70703125, "logits/rejected": -1.73046875, "logps/chosen": -760.1875, "logps/rejected": -847.0625, "loss": 0.55322265625, "nll_loss": 1.33154296875, "rewards/accuracies": 1.0, "rewards/chosen": 3.66796875, "rewards/margins": 8.203125, "rewards/rejected": -4.53125, "step": 272 }, { "epoch": 0.8185907046476761, "grad_norm": 37.0, "learning_rate": 4.51792329289928e-06, "logits/chosen": -1.712890625, "logits/rejected": -1.658203125, "logps/chosen": -1470.5, "logps/rejected": -1666.0, "loss": 0.5849609375, "nll_loss": 1.81640625, "rewards/accuracies": 1.0, "rewards/chosen": 5.328125, "rewards/margins": 11.5, "rewards/rejected": -6.171875, "step": 273 }, { "epoch": 0.8215892053973014, "grad_norm": 26.125, "learning_rate": 4.485895014313198e-06, "logits/chosen": -1.765625, "logits/rejected": -1.6953125, "logps/chosen": -1831.0, "logps/rejected": -1974.0, "loss": 0.572265625, "nll_loss": 2.09375, "rewards/accuracies": 1.0, "rewards/chosen": 5.578125, "rewards/margins": 14.0, "rewards/rejected": -8.421875, "step": 274 }, { "epoch": 0.8245877061469266, "grad_norm": 26.25, "learning_rate": 4.453888042143847e-06, "logits/chosen": -1.6953125, "logits/rejected": -1.80859375, "logps/chosen": -1425.0, "logps/rejected": -1620.0, "loss": 0.7119140625, "nll_loss": 2.078125, "rewards/accuracies": 1.0, "rewards/chosen": 4.9140625, "rewards/margins": 9.0078125, "rewards/rejected": -4.0859375, "step": 275 }, { "epoch": 0.8275862068965517, "grad_norm": 59.75, "learning_rate": 4.421903702878822e-06, "logits/chosen": -1.755859375, "logits/rejected": -1.806640625, "logps/chosen": -1453.0, "logps/rejected": -1662.0, "loss": 0.583984375, "nll_loss": 1.93359375, "rewards/accuracies": 1.0, "rewards/chosen": 8.359375, "rewards/margins": 14.78125, "rewards/rejected": -6.421875, "step": 276 }, { "epoch": 0.8305847076461769, "grad_norm": 15.1875, "learning_rate": 4.389943322067728e-06, "logits/chosen": -1.59375, "logits/rejected": -1.6015625, "logps/chosen": -503.75, "logps/rejected": -765.5, "loss": 0.2955322265625, "nll_loss": 0.731689453125, "rewards/accuracies": 1.0, "rewards/chosen": 3.2421875, "rewards/margins": 12.578125, "rewards/rejected": -9.3359375, "step": 277 }, { "epoch": 0.8335832083958021, "grad_norm": 35.75, "learning_rate": 4.358008224267245e-06, "logits/chosen": -1.595703125, "logits/rejected": -1.646484375, "logps/chosen": -1723.0, "logps/rejected": -1879.0, "loss": 0.5888671875, "nll_loss": 2.0234375, "rewards/accuracies": 1.0, "rewards/chosen": 5.890625, "rewards/margins": 12.3125, "rewards/rejected": -6.421875, "step": 278 }, { "epoch": 0.8365817091454273, "grad_norm": 9.5625, "learning_rate": 4.326099732986231e-06, "logits/chosen": -1.73046875, "logits/rejected": -1.72265625, "logps/chosen": -851.734375, "logps/rejected": -817.25, "loss": 0.5048828125, "nll_loss": 1.164306640625, "rewards/accuracies": 1.0, "rewards/chosen": 4.314453125, "rewards/margins": 8.859375, "rewards/rejected": -4.5546875, "step": 279 }, { "epoch": 0.8395802098950524, "grad_norm": 18.625, "learning_rate": 4.29421917063087e-06, "logits/chosen": -1.830078125, "logits/rejected": -1.822265625, "logps/chosen": -1700.0, "logps/rejected": -1900.0, "loss": 0.705078125, "nll_loss": 2.12890625, "rewards/accuracies": 1.0, "rewards/chosen": 4.640625, "rewards/margins": 11.4140625, "rewards/rejected": -6.7578125, "step": 280 }, { "epoch": 0.8425787106446777, "grad_norm": 61.25, "learning_rate": 4.262367858449867e-06, "logits/chosen": -1.62890625, "logits/rejected": -1.65625, "logps/chosen": -1511.0, "logps/rejected": -1962.0, "loss": 0.7255859375, "nll_loss": 2.3125, "rewards/accuracies": 1.0, "rewards/chosen": 6.0703125, "rewards/margins": 11.625, "rewards/rejected": -5.5625, "step": 281 }, { "epoch": 0.8455772113943029, "grad_norm": 24.5, "learning_rate": 4.230547116479691e-06, "logits/chosen": -1.748046875, "logits/rejected": -1.76953125, "logps/chosen": -1011.6875, "logps/rejected": -1073.25, "loss": 0.432861328125, "nll_loss": 1.27880859375, "rewards/accuracies": 1.0, "rewards/chosen": 5.3984375, "rewards/margins": 10.96875, "rewards/rejected": -5.5859375, "step": 282 }, { "epoch": 0.848575712143928, "grad_norm": 42.25, "learning_rate": 4.1987582634898724e-06, "logits/chosen": -1.681640625, "logits/rejected": -1.62890625, "logps/chosen": -1221.75, "logps/rejected": -1297.0, "loss": 0.474365234375, "nll_loss": 1.327392578125, "rewards/accuracies": 1.0, "rewards/chosen": 4.66015625, "rewards/margins": 13.53125, "rewards/rejected": -8.859375, "step": 283 }, { "epoch": 0.8515742128935532, "grad_norm": 55.5, "learning_rate": 4.167002616928341e-06, "logits/chosen": -1.671875, "logits/rejected": -1.662109375, "logps/chosen": -831.75, "logps/rejected": -880.0, "loss": 0.5908203125, "nll_loss": 1.43115234375, "rewards/accuracies": 1.0, "rewards/chosen": 2.638671875, "rewards/margins": 8.984375, "rewards/rejected": -6.359375, "step": 284 }, { "epoch": 0.8545727136431784, "grad_norm": 7.25, "learning_rate": 4.135281492866826e-06, "logits/chosen": -1.73046875, "logits/rejected": -1.755859375, "logps/chosen": -1302.0, "logps/rejected": -1766.0, "loss": 0.55322265625, "nll_loss": 2.083984375, "rewards/accuracies": 1.0, "rewards/chosen": 5.90625, "rewards/margins": 14.296875, "rewards/rejected": -8.40625, "step": 285 }, { "epoch": 0.8575712143928036, "grad_norm": 48.25, "learning_rate": 4.103596205946323e-06, "logits/chosen": -1.623046875, "logits/rejected": -1.681640625, "logps/chosen": -1045.6875, "logps/rejected": -1236.75, "loss": 0.625, "nll_loss": 1.55859375, "rewards/accuracies": 1.0, "rewards/chosen": 3.359375, "rewards/margins": 9.03125, "rewards/rejected": -5.671875, "step": 286 }, { "epoch": 0.8605697151424287, "grad_norm": 32.0, "learning_rate": 4.0719480693225964e-06, "logits/chosen": -1.6328125, "logits/rejected": -1.71484375, "logps/chosen": -1069.75, "logps/rejected": -976.25, "loss": 0.6630859375, "nll_loss": 1.48388671875, "rewards/accuracies": 1.0, "rewards/chosen": 5.046875, "rewards/margins": 8.68359375, "rewards/rejected": -3.63671875, "step": 287 }, { "epoch": 0.863568215892054, "grad_norm": 13.8125, "learning_rate": 4.040338394611772e-06, "logits/chosen": -1.748046875, "logits/rejected": -1.71875, "logps/chosen": -1143.25, "logps/rejected": -1241.0, "loss": 0.45947265625, "nll_loss": 1.4365234375, "rewards/accuracies": 1.0, "rewards/chosen": 3.59375, "rewards/margins": 11.484375, "rewards/rejected": -7.90625, "step": 288 }, { "epoch": 0.8665667166416792, "grad_norm": 17.375, "learning_rate": 4.0087684918359646e-06, "logits/chosen": -1.64453125, "logits/rejected": -1.73046875, "logps/chosen": -1401.0, "logps/rejected": -1714.0, "loss": 0.50048828125, "nll_loss": 1.7919921875, "rewards/accuracies": 1.0, "rewards/chosen": 6.515625, "rewards/margins": 14.984375, "rewards/rejected": -8.46875, "step": 289 }, { "epoch": 0.8695652173913043, "grad_norm": 11.3125, "learning_rate": 3.977239669368998e-06, "logits/chosen": -1.810546875, "logits/rejected": -1.708984375, "logps/chosen": -1070.5, "logps/rejected": -1133.0, "loss": 0.50244140625, "nll_loss": 1.74560546875, "rewards/accuracies": 1.0, "rewards/chosen": 5.09375, "rewards/margins": 15.515625, "rewards/rejected": -10.390625, "step": 290 }, { "epoch": 0.8725637181409296, "grad_norm": 40.5, "learning_rate": 3.945753233882168e-06, "logits/chosen": -1.66015625, "logits/rejected": -1.689453125, "logps/chosen": -962.25, "logps/rejected": -1140.25, "loss": 0.4775390625, "nll_loss": 1.3759765625, "rewards/accuracies": 1.0, "rewards/chosen": 3.951171875, "rewards/margins": 10.0390625, "rewards/rejected": -6.0859375, "step": 291 }, { "epoch": 0.8755622188905547, "grad_norm": 40.25, "learning_rate": 3.9143104902901085e-06, "logits/chosen": -1.69921875, "logits/rejected": -1.69140625, "logps/chosen": -1700.0, "logps/rejected": -1820.0, "loss": 0.7255859375, "nll_loss": 2.208984375, "rewards/accuracies": 1.0, "rewards/chosen": 5.6328125, "rewards/margins": 11.265625, "rewards/rejected": -5.640625, "step": 292 }, { "epoch": 0.8785607196401799, "grad_norm": 27.25, "learning_rate": 3.882912741696688e-06, "logits/chosen": -1.703125, "logits/rejected": -1.6953125, "logps/chosen": -1748.0, "logps/rejected": -1916.0, "loss": 0.673828125, "nll_loss": 2.037109375, "rewards/accuracies": 1.0, "rewards/chosen": 5.1875, "rewards/margins": 12.515625, "rewards/rejected": -7.328125, "step": 293 }, { "epoch": 0.881559220389805, "grad_norm": 11.1875, "learning_rate": 3.851561289341023e-06, "logits/chosen": -1.73828125, "logits/rejected": -1.74609375, "logps/chosen": -1832.0, "logps/rejected": -1968.0, "loss": 0.7021484375, "nll_loss": 2.150390625, "rewards/accuracies": 1.0, "rewards/chosen": 6.390625, "rewards/margins": 13.078125, "rewards/rejected": -6.6875, "step": 294 }, { "epoch": 0.8845577211394303, "grad_norm": 52.5, "learning_rate": 3.820257432543539e-06, "logits/chosen": -1.744140625, "logits/rejected": -1.71484375, "logps/chosen": -999.5, "logps/rejected": -1240.25, "loss": 0.666015625, "nll_loss": 1.64990234375, "rewards/accuracies": 1.0, "rewards/chosen": 5.6015625, "rewards/margins": 11.1015625, "rewards/rejected": -5.484375, "step": 295 }, { "epoch": 0.8875562218890555, "grad_norm": 15.125, "learning_rate": 3.789002468652121e-06, "logits/chosen": -1.662109375, "logits/rejected": -1.669921875, "logps/chosen": -1266.5, "logps/rejected": -1460.0, "loss": 0.5751953125, "nll_loss": 1.904296875, "rewards/accuracies": 1.0, "rewards/chosen": 5.0859375, "rewards/margins": 11.984375, "rewards/rejected": -6.8984375, "step": 296 }, { "epoch": 0.8905547226386806, "grad_norm": 14.375, "learning_rate": 3.7577976929883608e-06, "logits/chosen": -1.640625, "logits/rejected": -1.5625, "logps/chosen": -1092.25, "logps/rejected": -1052.5, "loss": 0.61572265625, "nll_loss": 1.72509765625, "rewards/accuracies": 1.0, "rewards/chosen": 4.234375, "rewards/margins": 11.65625, "rewards/rejected": -7.421875, "step": 297 }, { "epoch": 0.8935532233883059, "grad_norm": 45.75, "learning_rate": 3.726644398793857e-06, "logits/chosen": -1.822265625, "logits/rejected": -1.822265625, "logps/chosen": -1571.0, "logps/rejected": -1712.0, "loss": 0.7763671875, "nll_loss": 1.75, "rewards/accuracies": 1.0, "rewards/chosen": 2.98828125, "rewards/margins": 8.859375, "rewards/rejected": -5.8828125, "step": 298 }, { "epoch": 0.896551724137931, "grad_norm": 8.375, "learning_rate": 3.695543877176626e-06, "logits/chosen": -1.75, "logits/rejected": -1.734375, "logps/chosen": -1709.0, "logps/rejected": -1790.0, "loss": 0.5283203125, "nll_loss": 1.96484375, "rewards/accuracies": 1.0, "rewards/chosen": 5.9921875, "rewards/margins": 12.765625, "rewards/rejected": -6.7734375, "step": 299 }, { "epoch": 0.8995502248875562, "grad_norm": 11.8125, "learning_rate": 3.6644974170575907e-06, "logits/chosen": -1.69921875, "logits/rejected": -1.71484375, "logps/chosen": -919.75, "logps/rejected": -874.5, "loss": 0.53564453125, "nll_loss": 1.59619140625, "rewards/accuracies": 1.0, "rewards/chosen": 3.76953125, "rewards/margins": 9.625, "rewards/rejected": -5.8671875, "step": 300 }, { "epoch": 0.9025487256371814, "grad_norm": 16.75, "learning_rate": 3.6335063051171725e-06, "logits/chosen": -1.83203125, "logits/rejected": -1.814453125, "logps/chosen": -1432.0, "logps/rejected": -1301.0, "loss": 0.63037109375, "nll_loss": 1.95703125, "rewards/accuracies": 1.0, "rewards/chosen": 4.0625, "rewards/margins": 10.671875, "rewards/rejected": -6.609375, "step": 301 }, { "epoch": 0.9055472263868066, "grad_norm": 25.5, "learning_rate": 3.6025718257419532e-06, "logits/chosen": -1.662109375, "logits/rejected": -1.68359375, "logps/chosen": -844.375, "logps/rejected": -1091.25, "loss": 0.654296875, "nll_loss": 1.51171875, "rewards/accuracies": 1.0, "rewards/chosen": 3.984375, "rewards/margins": 8.92578125, "rewards/rejected": -4.94140625, "step": 302 }, { "epoch": 0.9085457271364318, "grad_norm": 35.5, "learning_rate": 3.5716952609714517e-06, "logits/chosen": -1.755859375, "logits/rejected": -1.716796875, "logps/chosen": -1732.0, "logps/rejected": -1798.0, "loss": 0.5380859375, "nll_loss": 1.880859375, "rewards/accuracies": 1.0, "rewards/chosen": 5.8125, "rewards/margins": 13.328125, "rewards/rejected": -7.515625, "step": 303 }, { "epoch": 0.9115442278860569, "grad_norm": 15.0, "learning_rate": 3.540877890444989e-06, "logits/chosen": -1.646484375, "logits/rejected": -1.671875, "logps/chosen": -941.125, "logps/rejected": -1159.0, "loss": 0.361328125, "nll_loss": 1.1339111328125, "rewards/accuracies": 1.0, "rewards/chosen": 4.390625, "rewards/margins": 16.875, "rewards/rejected": -12.46875, "step": 304 }, { "epoch": 0.9145427286356822, "grad_norm": 25.0, "learning_rate": 3.5101209913486655e-06, "logits/chosen": -1.75390625, "logits/rejected": -1.73046875, "logps/chosen": -1143.359375, "logps/rejected": -1245.375, "loss": 0.48388671875, "nll_loss": 1.440185546875, "rewards/accuracies": 1.0, "rewards/chosen": 3.66796875, "rewards/margins": 8.578125, "rewards/rejected": -4.9140625, "step": 305 }, { "epoch": 0.9175412293853074, "grad_norm": 28.875, "learning_rate": 3.4794258383624115e-06, "logits/chosen": -1.734375, "logits/rejected": -1.767578125, "logps/chosen": -1412.75, "logps/rejected": -1507.0, "loss": 0.5478515625, "nll_loss": 1.734375, "rewards/accuracies": 1.0, "rewards/chosen": 4.6796875, "rewards/margins": 9.5, "rewards/rejected": -4.8203125, "step": 306 }, { "epoch": 0.9205397301349325, "grad_norm": 18.25, "learning_rate": 3.448793703607175e-06, "logits/chosen": -1.669921875, "logits/rejected": -1.6953125, "logps/chosen": -1309.0, "logps/rejected": -1461.0, "loss": 0.537109375, "nll_loss": 1.5556640625, "rewards/accuracies": 1.0, "rewards/chosen": 4.375, "rewards/margins": 11.2578125, "rewards/rejected": -6.8828125, "step": 307 }, { "epoch": 0.9235382308845578, "grad_norm": 25.75, "learning_rate": 3.4182258565921933e-06, "logits/chosen": -1.78515625, "logits/rejected": -1.76171875, "logps/chosen": -1364.0, "logps/rejected": -1426.0, "loss": 0.55078125, "nll_loss": 1.841796875, "rewards/accuracies": 1.0, "rewards/chosen": 4.20703125, "rewards/margins": 10.15625, "rewards/rejected": -5.9609375, "step": 308 }, { "epoch": 0.9265367316341829, "grad_norm": 9.9375, "learning_rate": 3.3877235641623797e-06, "logits/chosen": -1.642578125, "logits/rejected": -1.66015625, "logps/chosen": -496.75, "logps/rejected": -698.5, "loss": 0.4853515625, "nll_loss": 0.99365234375, "rewards/accuracies": 1.0, "rewards/chosen": 2.99609375, "rewards/margins": 8.07421875, "rewards/rejected": -5.078125, "step": 309 }, { "epoch": 0.9295352323838081, "grad_norm": 7.28125, "learning_rate": 3.3572880904458267e-06, "logits/chosen": -1.607421875, "logits/rejected": -1.685546875, "logps/chosen": -1492.25, "logps/rejected": -1327.75, "loss": 0.57421875, "nll_loss": 2.006103515625, "rewards/accuracies": 1.0, "rewards/chosen": 4.54296875, "rewards/margins": 13.5859375, "rewards/rejected": -9.03125, "step": 310 }, { "epoch": 0.9325337331334332, "grad_norm": 31.75, "learning_rate": 3.32692069680141e-06, "logits/chosen": -1.744140625, "logits/rejected": -1.826171875, "logps/chosen": -1816.5, "logps/rejected": -1791.0, "loss": 0.63330078125, "nll_loss": 2.265625, "rewards/accuracies": 1.0, "rewards/chosen": 6.15625, "rewards/margins": 13.453125, "rewards/rejected": -7.296875, "step": 311 }, { "epoch": 0.9355322338830585, "grad_norm": 61.25, "learning_rate": 3.2966226417665125e-06, "logits/chosen": -1.744140625, "logits/rejected": -1.759765625, "logps/chosen": -1076.0, "logps/rejected": -1744.0, "loss": 0.568359375, "nll_loss": 1.85546875, "rewards/accuracies": 1.0, "rewards/chosen": 6.1875, "rewards/margins": 13.625, "rewards/rejected": -7.4296875, "step": 312 }, { "epoch": 0.9385307346326837, "grad_norm": 11.875, "learning_rate": 3.2663951810048683e-06, "logits/chosen": -1.703125, "logits/rejected": -1.75, "logps/chosen": -1554.0, "logps/rejected": -1578.0, "loss": 0.5556640625, "nll_loss": 2.0390625, "rewards/accuracies": 1.0, "rewards/chosen": 5.4375, "rewards/margins": 12.328125, "rewards/rejected": -6.890625, "step": 313 }, { "epoch": 0.9415292353823088, "grad_norm": 6.75, "learning_rate": 3.236239567254526e-06, "logits/chosen": -1.6796875, "logits/rejected": -1.6875, "logps/chosen": -1006.5, "logps/rejected": -1101.5, "loss": 0.395263671875, "nll_loss": 1.31591796875, "rewards/accuracies": 1.0, "rewards/chosen": 5.1171875, "rewards/margins": 14.734375, "rewards/rejected": -9.609375, "step": 314 }, { "epoch": 0.9445277361319341, "grad_norm": 9.25, "learning_rate": 3.206157050275927e-06, "logits/chosen": -1.791015625, "logits/rejected": -1.71875, "logps/chosen": -1070.0, "logps/rejected": -1108.0, "loss": 0.4912109375, "nll_loss": 1.67578125, "rewards/accuracies": 1.0, "rewards/chosen": 4.4296875, "rewards/margins": 11.15625, "rewards/rejected": -6.71875, "step": 315 }, { "epoch": 0.9475262368815592, "grad_norm": 15.1875, "learning_rate": 3.176148876800109e-06, "logits/chosen": -1.8203125, "logits/rejected": -1.76171875, "logps/chosen": -1850.0, "logps/rejected": -1926.0, "loss": 0.560546875, "nll_loss": 2.033203125, "rewards/accuracies": 1.0, "rewards/chosen": 5.4765625, "rewards/margins": 12.4375, "rewards/rejected": -6.953125, "step": 316 }, { "epoch": 0.9505247376311844, "grad_norm": 27.25, "learning_rate": 3.1462162904770376e-06, "logits/chosen": -1.650390625, "logits/rejected": -1.662109375, "logps/chosen": -1740.5, "logps/rejected": -1696.25, "loss": 0.5810546875, "nll_loss": 1.8818359375, "rewards/accuracies": 1.0, "rewards/chosen": 4.421875, "rewards/margins": 10.3828125, "rewards/rejected": -5.9609375, "step": 317 }, { "epoch": 0.9535232383808095, "grad_norm": 6.84375, "learning_rate": 3.116360531824074e-06, "logits/chosen": -1.505859375, "logits/rejected": -1.517578125, "logps/chosen": -302.40625, "logps/rejected": -462.75, "loss": 0.2587890625, "nll_loss": 0.62255859375, "rewards/accuracies": 1.0, "rewards/chosen": 3.3359375, "rewards/margins": 13.15625, "rewards/rejected": -9.8359375, "step": 318 }, { "epoch": 0.9565217391304348, "grad_norm": 7.46875, "learning_rate": 3.0865828381745515e-06, "logits/chosen": -1.712890625, "logits/rejected": -1.7265625, "logps/chosen": -1206.0, "logps/rejected": -1292.0, "loss": 0.5078125, "nll_loss": 1.751953125, "rewards/accuracies": 1.0, "rewards/chosen": 5.6171875, "rewards/margins": 12.65625, "rewards/rejected": -7.03125, "step": 319 }, { "epoch": 0.95952023988006, "grad_norm": 23.625, "learning_rate": 3.056884443626499e-06, "logits/chosen": -1.765625, "logits/rejected": -1.76953125, "logps/chosen": -1606.0, "logps/rejected": -1787.0, "loss": 0.515625, "nll_loss": 1.6953125, "rewards/accuracies": 1.0, "rewards/chosen": 4.546875, "rewards/margins": 10.8515625, "rewards/rejected": -6.3046875, "step": 320 }, { "epoch": 0.9625187406296851, "grad_norm": 15.4375, "learning_rate": 3.027266578991497e-06, "logits/chosen": -1.71875, "logits/rejected": -1.732421875, "logps/chosen": -1544.5, "logps/rejected": -1706.5, "loss": 0.4716796875, "nll_loss": 1.61083984375, "rewards/accuracies": 1.0, "rewards/chosen": 4.8671875, "rewards/margins": 11.8125, "rewards/rejected": -6.9453125, "step": 321 }, { "epoch": 0.9655172413793104, "grad_norm": 18.625, "learning_rate": 2.997730471743667e-06, "logits/chosen": -1.6328125, "logits/rejected": -1.650390625, "logps/chosen": -1250.75, "logps/rejected": -1418.0, "loss": 0.44970703125, "nll_loss": 1.46240234375, "rewards/accuracies": 1.0, "rewards/chosen": 4.78515625, "rewards/margins": 13.453125, "rewards/rejected": -8.6796875, "step": 322 }, { "epoch": 0.9685157421289355, "grad_norm": 17.625, "learning_rate": 2.9682773459688087e-06, "logits/chosen": -1.638671875, "logits/rejected": -1.642578125, "logps/chosen": -1154.0, "logps/rejected": -1335.0, "loss": 0.51171875, "nll_loss": 1.658203125, "rewards/accuracies": 1.0, "rewards/chosen": 4.484375, "rewards/margins": 11.859375, "rewards/rejected": -7.390625, "step": 323 }, { "epoch": 0.9715142428785607, "grad_norm": 10.6875, "learning_rate": 2.9389084223136523e-06, "logits/chosen": -1.6875, "logits/rejected": -1.705078125, "logps/chosen": -1032.5, "logps/rejected": -1095.0, "loss": 0.4462890625, "nll_loss": 1.50390625, "rewards/accuracies": 1.0, "rewards/chosen": 4.765625, "rewards/margins": 14.046875, "rewards/rejected": -9.28125, "step": 324 }, { "epoch": 0.974512743628186, "grad_norm": 12.3125, "learning_rate": 2.9096249179352833e-06, "logits/chosen": -1.49609375, "logits/rejected": -1.5703125, "logps/chosen": -1409.5, "logps/rejected": -1573.0, "loss": 0.5673828125, "nll_loss": 2.056640625, "rewards/accuracies": 1.0, "rewards/chosen": 5.6328125, "rewards/margins": 14.546875, "rewards/rejected": -8.9296875, "step": 325 }, { "epoch": 0.9775112443778111, "grad_norm": 10.8125, "learning_rate": 2.880428046450697e-06, "logits/chosen": -1.591796875, "logits/rejected": -1.658203125, "logps/chosen": -1150.0, "logps/rejected": -1345.0, "loss": 0.45703125, "nll_loss": 1.662109375, "rewards/accuracies": 1.0, "rewards/chosen": 5.7734375, "rewards/margins": 13.71875, "rewards/rejected": -7.9375, "step": 326 }, { "epoch": 0.9805097451274363, "grad_norm": 12.4375, "learning_rate": 2.8513190178865004e-06, "logits/chosen": -1.646484375, "logits/rejected": -1.609375, "logps/chosen": -724.75, "logps/rejected": -562.5, "loss": 0.37158203125, "nll_loss": 1.12841796875, "rewards/accuracies": 1.0, "rewards/chosen": 3.72265625, "rewards/margins": 10.484375, "rewards/rejected": -6.7734375, "step": 327 }, { "epoch": 0.9835082458770614, "grad_norm": 21.125, "learning_rate": 2.822299038628762e-06, "logits/chosen": -1.615234375, "logits/rejected": -1.689453125, "logps/chosen": -1058.0, "logps/rejected": -1222.25, "loss": 0.490966796875, "nll_loss": 1.5361328125, "rewards/accuracies": 1.0, "rewards/chosen": 4.78515625, "rewards/margins": 10.96875, "rewards/rejected": -6.1796875, "step": 328 }, { "epoch": 0.9865067466266867, "grad_norm": 37.25, "learning_rate": 2.793369311373021e-06, "logits/chosen": -1.708984375, "logits/rejected": -1.705078125, "logps/chosen": -1668.0, "logps/rejected": -1784.0, "loss": 0.59375, "nll_loss": 1.796875, "rewards/accuracies": 1.0, "rewards/chosen": 5.3125, "rewards/margins": 12.0, "rewards/rejected": -6.6875, "step": 329 }, { "epoch": 0.9895052473763118, "grad_norm": 58.75, "learning_rate": 2.7645310350744296e-06, "logits/chosen": -1.818359375, "logits/rejected": -1.806640625, "logps/chosen": -1686.0, "logps/rejected": -1610.0, "loss": 0.609375, "nll_loss": 1.896484375, "rewards/accuracies": 1.0, "rewards/chosen": 3.359375, "rewards/margins": 10.609375, "rewards/rejected": -7.25, "step": 330 }, { "epoch": 0.992503748125937, "grad_norm": 7.59375, "learning_rate": 2.7357854048980893e-06, "logits/chosen": -1.7265625, "logits/rejected": -1.73046875, "logps/chosen": -1509.0, "logps/rejected": -1515.0, "loss": 0.568359375, "nll_loss": 2.03125, "rewards/accuracies": 1.0, "rewards/chosen": 5.671875, "rewards/margins": 12.734375, "rewards/rejected": -7.0625, "step": 331 }, { "epoch": 0.9955022488755623, "grad_norm": 10.125, "learning_rate": 2.7071336121694856e-06, "logits/chosen": -1.638671875, "logits/rejected": -1.654296875, "logps/chosen": -1256.5, "logps/rejected": -1523.0, "loss": 0.47998046875, "nll_loss": 1.6484375, "rewards/accuracies": 1.0, "rewards/chosen": 6.1484375, "rewards/margins": 12.953125, "rewards/rejected": -6.8125, "step": 332 }, { "epoch": 0.9985007496251874, "grad_norm": 23.25, "learning_rate": 2.6785768443251437e-06, "logits/chosen": -1.70703125, "logits/rejected": -1.705078125, "logps/chosen": -1104.875, "logps/rejected": -1212.5, "loss": 0.5185546875, "nll_loss": 1.36279296875, "rewards/accuracies": 1.0, "rewards/chosen": 4.06640625, "rewards/margins": 11.3046875, "rewards/rejected": -7.234375, "step": 333 }, { "epoch": 1.0, "grad_norm": 44.5, "learning_rate": 2.6501162848634023e-06, "logits/chosen": -1.80859375, "logits/rejected": -1.859375, "logps/chosen": -2200.0, "logps/rejected": -2188.0, "loss": 0.654296875, "nll_loss": 2.28125, "rewards/accuracies": 1.0, "rewards/chosen": 5.25, "rewards/margins": 12.75, "rewards/rejected": -7.5, "step": 334 }, { "epoch": 1.0029985007496252, "grad_norm": 4.40625, "learning_rate": 2.621753113295361e-06, "logits/chosen": -1.673828125, "logits/rejected": -1.642578125, "logps/chosen": -1009.0, "logps/rejected": -1076.0, "loss": 0.47216796875, "nll_loss": 1.6875, "rewards/accuracies": 1.0, "rewards/chosen": 5.3359375, "rewards/margins": 12.15625, "rewards/rejected": -6.8203125, "step": 335 }, { "epoch": 1.0059970014992503, "grad_norm": 3.359375, "learning_rate": 2.5934885050960183e-06, "logits/chosen": -1.732421875, "logits/rejected": -1.705078125, "logps/chosen": -1638.0, "logps/rejected": -1790.0, "loss": 0.6123046875, "nll_loss": 2.26171875, "rewards/accuracies": 1.0, "rewards/chosen": 6.890625, "rewards/margins": 14.515625, "rewards/rejected": -7.640625, "step": 336 }, { "epoch": 1.0089955022488755, "grad_norm": 3.09375, "learning_rate": 2.565323631655532e-06, "logits/chosen": -1.77734375, "logits/rejected": -1.732421875, "logps/chosen": -1436.0, "logps/rejected": -1691.0, "loss": 0.513671875, "nll_loss": 1.8828125, "rewards/accuracies": 1.0, "rewards/chosen": 8.375, "rewards/margins": 15.96875, "rewards/rejected": -7.578125, "step": 337 }, { "epoch": 1.0119940029985008, "grad_norm": 5.0625, "learning_rate": 2.537259660230679e-06, "logits/chosen": -1.634765625, "logits/rejected": -1.64453125, "logps/chosen": -803.0, "logps/rejected": -966.0, "loss": 0.39501953125, "nll_loss": 1.34765625, "rewards/accuracies": 1.0, "rewards/chosen": 5.171875, "rewards/margins": 13.359375, "rewards/rejected": -8.1875, "step": 338 }, { "epoch": 1.014992503748126, "grad_norm": 8.1875, "learning_rate": 2.5092977538964887e-06, "logits/chosen": -1.744140625, "logits/rejected": -1.72265625, "logps/chosen": -1679.0, "logps/rejected": -1773.0, "loss": 0.5478515625, "nll_loss": 2.095703125, "rewards/accuracies": 1.0, "rewards/chosen": 7.6953125, "rewards/margins": 18.03125, "rewards/rejected": -10.34375, "step": 339 } ], "logging_steps": 1, "max_steps": 501, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }