| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.014992503748126, | |
| "eval_steps": 500, | |
| "global_step": 339, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0029985007496251873, | |
| "grad_norm": 98.0, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -1.517578125, | |
| "logits/rejected": -1.533203125, | |
| "logps/chosen": -767.25, | |
| "logps/rejected": -836.75, | |
| "loss": 1.3203125, | |
| "nll_loss": 1.28076171875, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005997001499250375, | |
| "grad_norm": 119.0, | |
| "learning_rate": 7.692307692307694e-07, | |
| "logits/chosen": -1.689453125, | |
| "logits/rejected": -1.634765625, | |
| "logps/chosen": -1800.0, | |
| "logps/rejected": -1910.0, | |
| "loss": 1.49609375, | |
| "nll_loss": 1.9921875, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008995502248875561, | |
| "grad_norm": 84.5, | |
| "learning_rate": 1.5384615384615387e-06, | |
| "logits/chosen": -1.71484375, | |
| "logits/rejected": -1.708984375, | |
| "logps/chosen": -1291.5, | |
| "logps/rejected": -1344.75, | |
| "loss": 1.4296875, | |
| "nll_loss": 1.6005859375, | |
| "rewards/accuracies": 0.25, | |
| "rewards/chosen": -0.0390625, | |
| "rewards/margins": -0.1875, | |
| "rewards/rejected": 0.1484375, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01199400299850075, | |
| "grad_norm": 52.0, | |
| "learning_rate": 2.307692307692308e-06, | |
| "logits/chosen": -1.529296875, | |
| "logits/rejected": -1.54296875, | |
| "logps/chosen": -613.5, | |
| "logps/rejected": -454.0, | |
| "loss": 1.189453125, | |
| "nll_loss": 0.8017578125, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": 0.109375, | |
| "rewards/margins": 0.078125, | |
| "rewards/rejected": 0.03125, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.014992503748125937, | |
| "grad_norm": 84.0, | |
| "learning_rate": 3.0769230769230774e-06, | |
| "logits/chosen": -1.48828125, | |
| "logits/rejected": -1.607421875, | |
| "logps/chosen": -729.25, | |
| "logps/rejected": -897.5, | |
| "loss": 1.251953125, | |
| "nll_loss": 1.33984375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.130859375, | |
| "rewards/margins": 0.451171875, | |
| "rewards/rejected": -0.3203125, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.017991004497751123, | |
| "grad_norm": 83.5, | |
| "learning_rate": 3.846153846153847e-06, | |
| "logits/chosen": -1.712890625, | |
| "logits/rejected": -1.6484375, | |
| "logps/chosen": -1176.0, | |
| "logps/rejected": -1174.0, | |
| "loss": 1.498046875, | |
| "nll_loss": 1.806640625, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.1484375, | |
| "rewards/margins": -0.4453125, | |
| "rewards/rejected": 0.59375, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.020989505247376312, | |
| "grad_norm": 110.0, | |
| "learning_rate": 4.615384615384616e-06, | |
| "logits/chosen": -1.751953125, | |
| "logits/rejected": -1.681640625, | |
| "logps/chosen": -1940.0, | |
| "logps/rejected": -1952.0, | |
| "loss": 1.505859375, | |
| "nll_loss": 2.5390625, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": 0.0078125, | |
| "rewards/margins": 0.7763671875, | |
| "rewards/rejected": -0.7685546875, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0239880059970015, | |
| "grad_norm": 113.5, | |
| "learning_rate": 5.384615384615385e-06, | |
| "logits/chosen": -1.564453125, | |
| "logits/rejected": -1.61328125, | |
| "logps/chosen": -1705.0, | |
| "logps/rejected": -1841.0, | |
| "loss": 1.314453125, | |
| "nll_loss": 1.87841796875, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.46875, | |
| "rewards/margins": 1.078125, | |
| "rewards/rejected": -0.609375, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.026986506746626688, | |
| "grad_norm": 103.0, | |
| "learning_rate": 6.153846153846155e-06, | |
| "logits/chosen": -1.748046875, | |
| "logits/rejected": -1.69140625, | |
| "logps/chosen": -1655.5, | |
| "logps/rejected": -1663.75, | |
| "loss": 1.427734375, | |
| "nll_loss": 1.86572265625, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": 0.0703125, | |
| "rewards/margins": 0.2421875, | |
| "rewards/rejected": -0.171875, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.029985007496251874, | |
| "grad_norm": 77.0, | |
| "learning_rate": 6.923076923076923e-06, | |
| "logits/chosen": -1.65625, | |
| "logits/rejected": -1.619140625, | |
| "logps/chosen": -1045.5, | |
| "logps/rejected": -1078.375, | |
| "loss": 1.365234375, | |
| "nll_loss": 1.49609375, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.26171875, | |
| "rewards/margins": 0.02734375, | |
| "rewards/rejected": 0.234375, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03298350824587706, | |
| "grad_norm": 87.5, | |
| "learning_rate": 7.692307692307694e-06, | |
| "logits/chosen": -1.578125, | |
| "logits/rejected": -1.595703125, | |
| "logps/chosen": -1101.0, | |
| "logps/rejected": -1194.5, | |
| "loss": 1.48828125, | |
| "nll_loss": 1.697265625, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.328125, | |
| "rewards/margins": -0.4072265625, | |
| "rewards/rejected": 0.7353515625, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.035982008995502246, | |
| "grad_norm": 92.5, | |
| "learning_rate": 8.461538461538462e-06, | |
| "logits/chosen": -1.67578125, | |
| "logits/rejected": -1.646484375, | |
| "logps/chosen": -871.5, | |
| "logps/rejected": -1011.0, | |
| "loss": 1.291015625, | |
| "nll_loss": 1.615234375, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.744140625, | |
| "rewards/margins": 0.650390625, | |
| "rewards/rejected": 0.09375, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.038980509745127435, | |
| "grad_norm": 66.0, | |
| "learning_rate": 9.230769230769232e-06, | |
| "logits/chosen": -1.681640625, | |
| "logits/rejected": -1.6484375, | |
| "logps/chosen": -1007.625, | |
| "logps/rejected": -1010.53125, | |
| "loss": 1.34375, | |
| "nll_loss": 1.3955078125, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.408203125, | |
| "rewards/margins": -0.0322265625, | |
| "rewards/rejected": 0.4404296875, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.041979010494752625, | |
| "grad_norm": 79.5, | |
| "learning_rate": 1e-05, | |
| "logits/chosen": -1.744140625, | |
| "logits/rejected": -1.69921875, | |
| "logps/chosen": -1097.0, | |
| "logps/rejected": -1043.25, | |
| "loss": 1.30859375, | |
| "nll_loss": 1.47265625, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.62890625, | |
| "rewards/margins": 0.30859375, | |
| "rewards/rejected": 0.3203125, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.044977511244377814, | |
| "grad_norm": 98.0, | |
| "learning_rate": 9.999896390730872e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.666015625, | |
| "logps/chosen": -1499.0, | |
| "logps/rejected": -1722.0, | |
| "loss": 1.283203125, | |
| "nll_loss": 1.810546875, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.1875, | |
| "rewards/margins": 1.0, | |
| "rewards/rejected": 0.1875, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.047976011994003, | |
| "grad_norm": 104.5, | |
| "learning_rate": 9.99958556721744e-06, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.638671875, | |
| "logps/chosen": -1740.0, | |
| "logps/rejected": -1810.0, | |
| "loss": 1.314453125, | |
| "nll_loss": 2.267578125, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.13671875, | |
| "rewards/margins": 1.53125, | |
| "rewards/rejected": -0.39453125, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.050974512743628186, | |
| "grad_norm": 99.5, | |
| "learning_rate": 9.99906754234138e-06, | |
| "logits/chosen": -1.6953125, | |
| "logits/rejected": -1.662109375, | |
| "logps/chosen": -1371.75, | |
| "logps/rejected": -1211.375, | |
| "loss": 1.1796875, | |
| "nll_loss": 1.88818359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.482421875, | |
| "rewards/margins": 1.986328125, | |
| "rewards/rejected": -1.50341796875, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.053973013493253376, | |
| "grad_norm": 84.0, | |
| "learning_rate": 9.998342337571566e-06, | |
| "logits/chosen": -1.689453125, | |
| "logits/rejected": -1.62890625, | |
| "logps/chosen": -1658.0, | |
| "logps/rejected": -1794.0, | |
| "loss": 1.21484375, | |
| "nll_loss": 2.17578125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.765625, | |
| "rewards/margins": 2.39453125, | |
| "rewards/rejected": -1.62890625, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.05697151424287856, | |
| "grad_norm": 98.0, | |
| "learning_rate": 9.997409982963173e-06, | |
| "logits/chosen": -1.658203125, | |
| "logits/rejected": -1.62109375, | |
| "logps/chosen": -1526.0, | |
| "logps/rejected": -1760.0, | |
| "loss": 1.23828125, | |
| "nll_loss": 1.951171875, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.67578125, | |
| "rewards/margins": 1.49609375, | |
| "rewards/rejected": -0.8203125, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.05997001499250375, | |
| "grad_norm": 81.5, | |
| "learning_rate": 9.996270517156431e-06, | |
| "logits/chosen": -1.70703125, | |
| "logits/rejected": -1.681640625, | |
| "logps/chosen": -1257.0, | |
| "logps/rejected": -1410.0, | |
| "loss": 1.08984375, | |
| "nll_loss": 1.80859375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.947265625, | |
| "rewards/margins": 2.205078125, | |
| "rewards/rejected": -1.2578125, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06296851574212893, | |
| "grad_norm": 68.5, | |
| "learning_rate": 9.994923987375029e-06, | |
| "logits/chosen": -1.673828125, | |
| "logits/rejected": -1.6796875, | |
| "logps/chosen": -1484.0, | |
| "logps/rejected": -1427.0, | |
| "loss": 1.017578125, | |
| "nll_loss": 2.109375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.3046875, | |
| "rewards/margins": 3.8828125, | |
| "rewards/rejected": -2.578125, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.06596701649175413, | |
| "grad_norm": 84.0, | |
| "learning_rate": 9.993370449424153e-06, | |
| "logits/chosen": -1.69140625, | |
| "logits/rejected": -1.666015625, | |
| "logps/chosen": -1314.0, | |
| "logps/rejected": -1415.0, | |
| "loss": 1.201171875, | |
| "nll_loss": 1.6083984375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.44921875, | |
| "rewards/margins": 1.31640625, | |
| "rewards/rejected": -0.8671875, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06896551724137931, | |
| "grad_norm": 106.0, | |
| "learning_rate": 9.991609967688177e-06, | |
| "logits/chosen": -1.70703125, | |
| "logits/rejected": -1.712890625, | |
| "logps/chosen": -1092.625, | |
| "logps/rejected": -1225.0, | |
| "loss": 1.103515625, | |
| "nll_loss": 1.69384765625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.525390625, | |
| "rewards/margins": 2.474609375, | |
| "rewards/rejected": -0.94921875, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.07196401799100449, | |
| "grad_norm": 72.5, | |
| "learning_rate": 9.98964261512799e-06, | |
| "logits/chosen": -1.68359375, | |
| "logits/rejected": -1.666015625, | |
| "logps/chosen": -1200.0, | |
| "logps/rejected": -1340.0, | |
| "loss": 1.1376953125, | |
| "nll_loss": 1.677734375, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.91015625, | |
| "rewards/margins": 1.685546875, | |
| "rewards/rejected": -0.775390625, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.07496251874062969, | |
| "grad_norm": 87.0, | |
| "learning_rate": 9.987468473277975e-06, | |
| "logits/chosen": -1.697265625, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -2006.0, | |
| "logps/rejected": -2126.0, | |
| "loss": 1.259765625, | |
| "nll_loss": 2.427734375, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 2.328125, | |
| "rewards/margins": 2.7060546875, | |
| "rewards/rejected": -0.3779296875, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.07796101949025487, | |
| "grad_norm": 77.0, | |
| "learning_rate": 9.985087632242634e-06, | |
| "logits/chosen": -1.69140625, | |
| "logits/rejected": -1.68359375, | |
| "logps/chosen": -949.5, | |
| "logps/rejected": -1022.5, | |
| "loss": 1.2421875, | |
| "nll_loss": 1.724609375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.53125, | |
| "rewards/margins": 1.041015625, | |
| "rewards/rejected": -0.509765625, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.08095952023988005, | |
| "grad_norm": 85.5, | |
| "learning_rate": 9.982500190692846e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.7265625, | |
| "logps/chosen": -2014.0, | |
| "logps/rejected": -2064.0, | |
| "loss": 1.318359375, | |
| "nll_loss": 2.41796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.53125, | |
| "rewards/margins": 2.265625, | |
| "rewards/rejected": 0.265625, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.08395802098950525, | |
| "grad_norm": 65.5, | |
| "learning_rate": 9.97970625586178e-06, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -1.64453125, | |
| "logps/chosen": -1634.5, | |
| "logps/rejected": -1626.5, | |
| "loss": 1.19140625, | |
| "nll_loss": 1.9072265625, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.0390625, | |
| "rewards/margins": 2.0390625, | |
| "rewards/rejected": -1.0, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.08695652173913043, | |
| "grad_norm": 99.5, | |
| "learning_rate": 9.976705943540458e-06, | |
| "logits/chosen": -1.5234375, | |
| "logits/rejected": -1.54296875, | |
| "logps/chosen": -861.5, | |
| "logps/rejected": -1116.0, | |
| "loss": 0.9560546875, | |
| "nll_loss": 1.390625, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.953125, | |
| "rewards/margins": 4.0, | |
| "rewards/rejected": -2.046875, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.08995502248875563, | |
| "grad_norm": 146.0, | |
| "learning_rate": 9.973499378072947e-06, | |
| "logits/chosen": -1.794921875, | |
| "logits/rejected": -1.72265625, | |
| "logps/chosen": -1960.0, | |
| "logps/rejected": -1952.0, | |
| "loss": 1.140625, | |
| "nll_loss": 2.068359375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.359375, | |
| "rewards/margins": 2.90625, | |
| "rewards/rejected": -2.546875, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09295352323838081, | |
| "grad_norm": 78.5, | |
| "learning_rate": 9.970086692351204e-06, | |
| "logits/chosen": -1.755859375, | |
| "logits/rejected": -1.7109375, | |
| "logps/chosen": -1414.0, | |
| "logps/rejected": -1594.0, | |
| "loss": 1.0595703125, | |
| "nll_loss": 1.90625, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 1.31640625, | |
| "rewards/margins": 3.203125, | |
| "rewards/rejected": -1.8828125, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.095952023988006, | |
| "grad_norm": 88.5, | |
| "learning_rate": 9.966468027809582e-06, | |
| "logits/chosen": -1.662109375, | |
| "logits/rejected": -1.658203125, | |
| "logps/chosen": -1564.0, | |
| "logps/rejected": -1528.0, | |
| "loss": 0.974609375, | |
| "nll_loss": 1.8515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.078125, | |
| "rewards/margins": 3.421875, | |
| "rewards/rejected": -2.34375, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.09895052473763119, | |
| "grad_norm": 66.0, | |
| "learning_rate": 9.962643534418954e-06, | |
| "logits/chosen": -1.68359375, | |
| "logits/rejected": -1.560546875, | |
| "logps/chosen": -1242.25, | |
| "logps/rejected": -1332.125, | |
| "loss": 1.10546875, | |
| "nll_loss": 1.994140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.53515625, | |
| "rewards/margins": 2.921875, | |
| "rewards/rejected": -0.384765625, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.10194902548725637, | |
| "grad_norm": 94.5, | |
| "learning_rate": 9.958613370680507e-06, | |
| "logits/chosen": -1.78125, | |
| "logits/rejected": -1.740234375, | |
| "logps/chosen": -1670.0, | |
| "logps/rejected": -1626.0, | |
| "loss": 1.232421875, | |
| "nll_loss": 2.09765625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.86328125, | |
| "rewards/margins": 1.953125, | |
| "rewards/rejected": -1.08984375, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.10494752623688156, | |
| "grad_norm": 78.0, | |
| "learning_rate": 9.954377703619171e-06, | |
| "logits/chosen": -1.6953125, | |
| "logits/rejected": -1.689453125, | |
| "logps/chosen": -1042.0, | |
| "logps/rejected": -1060.5, | |
| "loss": 1.1181640625, | |
| "nll_loss": 1.666015625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6796875, | |
| "rewards/margins": 2.91015625, | |
| "rewards/rejected": 0.76953125, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.10794602698650675, | |
| "grad_norm": 94.0, | |
| "learning_rate": 9.949936708776692e-06, | |
| "logits/chosen": -1.67578125, | |
| "logits/rejected": -1.65625, | |
| "logps/chosen": -952.5, | |
| "logps/rejected": -1075.0, | |
| "loss": 1.0927734375, | |
| "nll_loss": 1.6328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.5859375, | |
| "rewards/margins": 5.521484375, | |
| "rewards/rejected": -2.943359375, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.11094452773613193, | |
| "grad_norm": 72.0, | |
| "learning_rate": 9.945290570204361e-06, | |
| "logits/chosen": -1.736328125, | |
| "logits/rejected": -1.720703125, | |
| "logps/chosen": -792.0, | |
| "logps/rejected": -972.0, | |
| "loss": 1.0947265625, | |
| "nll_loss": 1.509765625, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 2.33203125, | |
| "rewards/margins": 2.1484375, | |
| "rewards/rejected": 0.18359375, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.11394302848575712, | |
| "grad_norm": 92.5, | |
| "learning_rate": 9.940439480455386e-06, | |
| "logits/chosen": -1.69140625, | |
| "logits/rejected": -1.677734375, | |
| "logps/chosen": -1531.0, | |
| "logps/rejected": -1532.0, | |
| "loss": 0.98828125, | |
| "nll_loss": 1.890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.40234375, | |
| "rewards/margins": 3.4296875, | |
| "rewards/rejected": -1.02734375, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.11694152923538231, | |
| "grad_norm": 66.0, | |
| "learning_rate": 9.935383640576915e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.671875, | |
| "logps/chosen": -1572.0, | |
| "logps/rejected": -1528.0, | |
| "loss": 1.048828125, | |
| "nll_loss": 1.96484375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.85546875, | |
| "rewards/margins": 3.95703125, | |
| "rewards/rejected": -2.1015625, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.1199400299850075, | |
| "grad_norm": 80.5, | |
| "learning_rate": 9.930123260101697e-06, | |
| "logits/chosen": -1.861328125, | |
| "logits/rejected": -1.802734375, | |
| "logps/chosen": -1168.0, | |
| "logps/rejected": -1392.0, | |
| "loss": 1.087890625, | |
| "nll_loss": 1.6796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.8984375, | |
| "rewards/margins": 2.65625, | |
| "rewards/rejected": -1.7578125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.12293853073463268, | |
| "grad_norm": 77.5, | |
| "learning_rate": 9.9246585570394e-06, | |
| "logits/chosen": -1.73828125, | |
| "logits/rejected": -1.552734375, | |
| "logps/chosen": -1316.25, | |
| "logps/rejected": -1588.0, | |
| "loss": 0.89453125, | |
| "nll_loss": 1.591796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.75, | |
| "rewards/margins": 3.625, | |
| "rewards/rejected": -1.875, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.12593703148425786, | |
| "grad_norm": 143.0, | |
| "learning_rate": 9.918989757867584e-06, | |
| "logits/chosen": -1.7890625, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -2000.0, | |
| "logps/rejected": -1968.0, | |
| "loss": 1.24609375, | |
| "nll_loss": 2.30078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.078125, | |
| "rewards/margins": 3.109375, | |
| "rewards/rejected": -3.1875, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.12893553223388307, | |
| "grad_norm": 61.25, | |
| "learning_rate": 9.9131170975223e-06, | |
| "logits/chosen": -1.7265625, | |
| "logits/rejected": -1.720703125, | |
| "logps/chosen": -1172.0, | |
| "logps/rejected": -1382.0, | |
| "loss": 1.087890625, | |
| "nll_loss": 1.943359375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.83984375, | |
| "rewards/margins": 3.05859375, | |
| "rewards/rejected": -1.21875, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.13193403298350825, | |
| "grad_norm": 71.0, | |
| "learning_rate": 9.907040819388372e-06, | |
| "logits/chosen": -1.794921875, | |
| "logits/rejected": -1.685546875, | |
| "logps/chosen": -946.0, | |
| "logps/rejected": -863.25, | |
| "loss": 0.943359375, | |
| "nll_loss": 1.4990234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.22265625, | |
| "rewards/margins": 2.45703125, | |
| "rewards/rejected": -1.2353515625, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.13493253373313344, | |
| "grad_norm": 62.25, | |
| "learning_rate": 9.90076117528929e-06, | |
| "logits/chosen": -1.64453125, | |
| "logits/rejected": -1.603515625, | |
| "logps/chosen": -743.25, | |
| "logps/rejected": -811.625, | |
| "loss": 0.95703125, | |
| "nll_loss": 1.330078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.2421875, | |
| "rewards/margins": 2.087890625, | |
| "rewards/rejected": -0.84375, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.13793103448275862, | |
| "grad_norm": 62.0, | |
| "learning_rate": 9.89427842547679e-06, | |
| "logits/chosen": -1.59375, | |
| "logits/rejected": -1.65234375, | |
| "logps/chosen": -1245.5, | |
| "logps/rejected": -1451.0, | |
| "loss": 1.017578125, | |
| "nll_loss": 1.7666015625, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.2578125, | |
| "rewards/margins": 3.19921875, | |
| "rewards/rejected": -1.94140625, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1409295352323838, | |
| "grad_norm": 92.0, | |
| "learning_rate": 9.88759283862006e-06, | |
| "logits/chosen": -1.80078125, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -1332.0, | |
| "logps/rejected": -1294.0, | |
| "loss": 1.0537109375, | |
| "nll_loss": 2.0556640625, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.17578125, | |
| "rewards/margins": 3.3359375, | |
| "rewards/rejected": -2.16015625, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.14392803598200898, | |
| "grad_norm": 70.0, | |
| "learning_rate": 9.880704691794608e-06, | |
| "logits/chosen": -1.5703125, | |
| "logits/rejected": -1.541015625, | |
| "logps/chosen": -1231.25, | |
| "logps/rejected": -1248.5, | |
| "loss": 1.08203125, | |
| "nll_loss": 1.873046875, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.82421875, | |
| "rewards/margins": 2.75390625, | |
| "rewards/rejected": -1.9296875, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1469265367316342, | |
| "grad_norm": 88.5, | |
| "learning_rate": 9.873614270470778e-06, | |
| "logits/chosen": -1.755859375, | |
| "logits/rejected": -1.75, | |
| "logps/chosen": -2344.0, | |
| "logps/rejected": -2312.0, | |
| "loss": 1.1123046875, | |
| "nll_loss": 2.46484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.375, | |
| "rewards/margins": 4.375, | |
| "rewards/rejected": -2.0, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.14992503748125938, | |
| "grad_norm": 45.75, | |
| "learning_rate": 9.866321868501914e-06, | |
| "logits/chosen": -1.689453125, | |
| "logits/rejected": -1.673828125, | |
| "logps/chosen": -1115.75, | |
| "logps/rejected": -1157.375, | |
| "loss": 0.8994140625, | |
| "nll_loss": 1.236328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.888671875, | |
| "rewards/margins": 2.609375, | |
| "rewards/rejected": -0.7197265625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15292353823088456, | |
| "grad_norm": 64.5, | |
| "learning_rate": 9.858827788112195e-06, | |
| "logits/chosen": -1.685546875, | |
| "logits/rejected": -1.646484375, | |
| "logps/chosen": -1329.0, | |
| "logps/rejected": -1496.0, | |
| "loss": 1.095703125, | |
| "nll_loss": 1.66015625, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.96875, | |
| "rewards/margins": 6.296875, | |
| "rewards/rejected": -4.328125, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.15592203898050974, | |
| "grad_norm": 55.25, | |
| "learning_rate": 9.851132339884097e-06, | |
| "logits/chosen": -1.642578125, | |
| "logits/rejected": -1.59765625, | |
| "logps/chosen": -847.21875, | |
| "logps/rejected": -881.5, | |
| "loss": 0.8349609375, | |
| "nll_loss": 1.100830078125, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 2.23828125, | |
| "rewards/margins": 3.14453125, | |
| "rewards/rejected": -0.90234375, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.15892053973013492, | |
| "grad_norm": 57.5, | |
| "learning_rate": 9.843235842745527e-06, | |
| "logits/chosen": -1.662109375, | |
| "logits/rejected": -1.6640625, | |
| "logps/chosen": -1027.3125, | |
| "logps/rejected": -1074.5625, | |
| "loss": 0.8544921875, | |
| "nll_loss": 1.43701171875, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 2.14453125, | |
| "rewards/margins": 3.2890625, | |
| "rewards/rejected": -1.146484375, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.1619190404797601, | |
| "grad_norm": 53.0, | |
| "learning_rate": 9.835138623956603e-06, | |
| "logits/chosen": -1.685546875, | |
| "logits/rejected": -1.62890625, | |
| "logps/chosen": -1329.0, | |
| "logps/rejected": -1319.5, | |
| "loss": 0.94921875, | |
| "nll_loss": 1.47802734375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 2.25, | |
| "rewards/margins": 2.953125, | |
| "rewards/rejected": -0.703125, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.16491754122938532, | |
| "grad_norm": 68.0, | |
| "learning_rate": 9.826841019096095e-06, | |
| "logits/chosen": -1.818359375, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1718.0, | |
| "logps/rejected": -1801.0, | |
| "loss": 0.8681640625, | |
| "nll_loss": 2.0546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.57421875, | |
| "rewards/margins": 4.7734375, | |
| "rewards/rejected": -2.203125, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.1679160419790105, | |
| "grad_norm": 73.5, | |
| "learning_rate": 9.818343372047509e-06, | |
| "logits/chosen": -1.591796875, | |
| "logits/rejected": -1.6640625, | |
| "logps/chosen": -1427.5, | |
| "logps/rejected": -1725.0, | |
| "loss": 0.8447265625, | |
| "nll_loss": 1.916015625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.6171875, | |
| "rewards/margins": 4.44921875, | |
| "rewards/rejected": -1.828125, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.17091454272863568, | |
| "grad_norm": 57.75, | |
| "learning_rate": 9.80964603498485e-06, | |
| "logits/chosen": -1.787109375, | |
| "logits/rejected": -1.751953125, | |
| "logps/chosen": -1070.25, | |
| "logps/rejected": -1346.0, | |
| "loss": 0.9833984375, | |
| "nll_loss": 1.65771484375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.73046875, | |
| "rewards/margins": 3.03515625, | |
| "rewards/rejected": -2.30859375, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.17391304347826086, | |
| "grad_norm": 86.5, | |
| "learning_rate": 9.80074936835801e-06, | |
| "logits/chosen": -1.74609375, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1275.0, | |
| "logps/rejected": -1245.0, | |
| "loss": 0.9365234375, | |
| "nll_loss": 1.6171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.09375, | |
| "rewards/margins": 3.78125, | |
| "rewards/rejected": -2.6875, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.17691154422788605, | |
| "grad_norm": 54.25, | |
| "learning_rate": 9.79165374087784e-06, | |
| "logits/chosen": -1.66015625, | |
| "logits/rejected": -1.6328125, | |
| "logps/chosen": -723.875, | |
| "logps/rejected": -758.0, | |
| "loss": 0.8095703125, | |
| "nll_loss": 1.4111328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.2109375, | |
| "rewards/margins": 3.40625, | |
| "rewards/rejected": -2.19921875, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.17991004497751126, | |
| "grad_norm": 91.0, | |
| "learning_rate": 9.782359529500867e-06, | |
| "logits/chosen": -1.650390625, | |
| "logits/rejected": -1.658203125, | |
| "logps/chosen": -1601.5, | |
| "logps/rejected": -1704.0, | |
| "loss": 0.78515625, | |
| "nll_loss": 1.71728515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.41015625, | |
| "rewards/margins": 6.46875, | |
| "rewards/rejected": -5.0625, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.18290854572713644, | |
| "grad_norm": 95.0, | |
| "learning_rate": 9.772867119413667e-06, | |
| "logits/chosen": -1.57421875, | |
| "logits/rejected": -1.724609375, | |
| "logps/chosen": -1421.0, | |
| "logps/rejected": -1319.0, | |
| "loss": 0.9287109375, | |
| "nll_loss": 2.1708984375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.3203125, | |
| "rewards/margins": 5.75390625, | |
| "rewards/rejected": -4.4375, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.18590704647676162, | |
| "grad_norm": 46.0, | |
| "learning_rate": 9.763176904016914e-06, | |
| "logits/chosen": -1.669921875, | |
| "logits/rejected": -1.634765625, | |
| "logps/chosen": -1163.0, | |
| "logps/rejected": -1400.0, | |
| "loss": 0.7841796875, | |
| "nll_loss": 1.623046875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.6875, | |
| "rewards/margins": 5.9296875, | |
| "rewards/rejected": -3.2421875, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1889055472263868, | |
| "grad_norm": 64.5, | |
| "learning_rate": 9.753289284909058e-06, | |
| "logits/chosen": -1.802734375, | |
| "logits/rejected": -1.712890625, | |
| "logps/chosen": -1660.0, | |
| "logps/rejected": -1758.0, | |
| "loss": 0.9921875, | |
| "nll_loss": 2.16015625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.4375, | |
| "rewards/margins": 4.7265625, | |
| "rewards/rejected": -2.2890625, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.191904047976012, | |
| "grad_norm": 36.25, | |
| "learning_rate": 9.743204671869694e-06, | |
| "logits/chosen": -1.728515625, | |
| "logits/rejected": -1.587890625, | |
| "logps/chosen": -1348.0, | |
| "logps/rejected": -1258.0, | |
| "loss": 0.8037109375, | |
| "nll_loss": 1.7705078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.24609375, | |
| "rewards/margins": 5.19140625, | |
| "rewards/rejected": -2.9453125, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.19490254872563717, | |
| "grad_norm": 61.75, | |
| "learning_rate": 9.73292348284258e-06, | |
| "logits/chosen": -1.61328125, | |
| "logits/rejected": -1.63671875, | |
| "logps/chosen": -1367.875, | |
| "logps/rejected": -1122.0, | |
| "loss": 0.927734375, | |
| "nll_loss": 1.49462890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.412109375, | |
| "rewards/margins": 3.296875, | |
| "rewards/rejected": -0.8828125, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.19790104947526238, | |
| "grad_norm": 79.0, | |
| "learning_rate": 9.722446143918307e-06, | |
| "logits/chosen": -1.6171875, | |
| "logits/rejected": -1.58984375, | |
| "logps/chosen": -1438.0, | |
| "logps/rejected": -1646.0, | |
| "loss": 0.767578125, | |
| "nll_loss": 1.986328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.34765625, | |
| "rewards/margins": 5.8046875, | |
| "rewards/rejected": -3.4609375, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.20089955022488756, | |
| "grad_norm": 120.5, | |
| "learning_rate": 9.711773089316645e-06, | |
| "logits/chosen": -1.6328125, | |
| "logits/rejected": -1.72265625, | |
| "logps/chosen": -1596.0, | |
| "logps/rejected": -1788.0, | |
| "loss": 0.822265625, | |
| "nll_loss": 2.0546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.5, | |
| "rewards/margins": 6.453125, | |
| "rewards/rejected": -1.953125, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.20389805097451275, | |
| "grad_norm": 61.5, | |
| "learning_rate": 9.70090476136855e-06, | |
| "logits/chosen": -1.685546875, | |
| "logits/rejected": -1.685546875, | |
| "logps/chosen": -1428.0, | |
| "logps/rejected": -1726.0, | |
| "loss": 0.7880859375, | |
| "nll_loss": 2.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.78125, | |
| "rewards/margins": 6.40625, | |
| "rewards/rejected": -2.625, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.20689655172413793, | |
| "grad_norm": 77.0, | |
| "learning_rate": 9.689841610497828e-06, | |
| "logits/chosen": -1.62109375, | |
| "logits/rejected": -1.693359375, | |
| "logps/chosen": -1035.0, | |
| "logps/rejected": -1347.0, | |
| "loss": 0.8095703125, | |
| "nll_loss": 1.748046875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4140625, | |
| "rewards/margins": 5.28125, | |
| "rewards/rejected": -1.8671875, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2098950524737631, | |
| "grad_norm": 41.0, | |
| "learning_rate": 9.678584095202468e-06, | |
| "logits/chosen": -1.78515625, | |
| "logits/rejected": -1.697265625, | |
| "logps/chosen": -1078.25, | |
| "logps/rejected": -1206.5, | |
| "loss": 0.8173828125, | |
| "nll_loss": 1.513671875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.53125, | |
| "rewards/margins": 5.34375, | |
| "rewards/rejected": -2.8125, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2128935532233883, | |
| "grad_norm": 63.25, | |
| "learning_rate": 9.667132682035646e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.712890625, | |
| "logps/chosen": -1301.5625, | |
| "logps/rejected": -1591.125, | |
| "loss": 0.5625, | |
| "nll_loss": 1.4111328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.837890625, | |
| "rewards/margins": 7.5, | |
| "rewards/rejected": -4.6640625, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2158920539730135, | |
| "grad_norm": 73.5, | |
| "learning_rate": 9.655487845586378e-06, | |
| "logits/chosen": -1.71484375, | |
| "logits/rejected": -1.70703125, | |
| "logps/chosen": -1466.0, | |
| "logps/rejected": -1548.75, | |
| "loss": 0.751953125, | |
| "nll_loss": 1.802734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.09765625, | |
| "rewards/margins": 5.875, | |
| "rewards/rejected": -2.78125, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.21889055472263869, | |
| "grad_norm": 62.75, | |
| "learning_rate": 9.643650068459863e-06, | |
| "logits/chosen": -1.765625, | |
| "logits/rejected": -1.740234375, | |
| "logps/chosen": -1574.0, | |
| "logps/rejected": -1474.0, | |
| "loss": 0.9892578125, | |
| "nll_loss": 2.103515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.703125, | |
| "rewards/margins": 3.703125, | |
| "rewards/rejected": -2.0, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.22188905547226387, | |
| "grad_norm": 64.5, | |
| "learning_rate": 9.631619841257477e-06, | |
| "logits/chosen": -1.755859375, | |
| "logits/rejected": -1.8046875, | |
| "logps/chosen": -1734.0, | |
| "logps/rejected": -1826.0, | |
| "loss": 0.7763671875, | |
| "nll_loss": 2.20703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5390625, | |
| "rewards/margins": 7.9453125, | |
| "rewards/rejected": -4.40625, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.22488755622188905, | |
| "grad_norm": 48.0, | |
| "learning_rate": 9.619397662556434e-06, | |
| "logits/chosen": -1.78515625, | |
| "logits/rejected": -1.646484375, | |
| "logps/chosen": -757.34375, | |
| "logps/rejected": -755.5, | |
| "loss": 0.8515625, | |
| "nll_loss": 1.28759765625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.5859375, | |
| "rewards/margins": 3.9296875, | |
| "rewards/rejected": -2.34375, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.22788605697151423, | |
| "grad_norm": 68.5, | |
| "learning_rate": 9.606984038889142e-06, | |
| "logits/chosen": -1.654296875, | |
| "logits/rejected": -1.615234375, | |
| "logps/chosen": -1081.0, | |
| "logps/rejected": -1185.0, | |
| "loss": 0.9453125, | |
| "nll_loss": 1.45703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.015625, | |
| "rewards/margins": 3.859375, | |
| "rewards/rejected": -2.84375, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.23088455772113944, | |
| "grad_norm": 33.25, | |
| "learning_rate": 9.594379484722185e-06, | |
| "logits/chosen": -1.607421875, | |
| "logits/rejected": -1.609375, | |
| "logps/chosen": -888.75, | |
| "logps/rejected": -1034.0, | |
| "loss": 0.587890625, | |
| "nll_loss": 1.18896484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.0546875, | |
| "rewards/margins": 8.9609375, | |
| "rewards/rejected": -6.890625, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.23388305847076463, | |
| "grad_norm": 61.0, | |
| "learning_rate": 9.581584522435025e-06, | |
| "logits/chosen": -1.751953125, | |
| "logits/rejected": -1.740234375, | |
| "logps/chosen": -2404.0, | |
| "logps/rejected": -2376.0, | |
| "loss": 0.8505859375, | |
| "nll_loss": 2.4453125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5, | |
| "rewards/margins": 7.25, | |
| "rewards/rejected": -3.75, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2368815592203898, | |
| "grad_norm": 82.0, | |
| "learning_rate": 9.568599682298337e-06, | |
| "logits/chosen": -1.57421875, | |
| "logits/rejected": -1.564453125, | |
| "logps/chosen": -1025.5, | |
| "logps/rejected": -1050.0, | |
| "loss": 0.935546875, | |
| "nll_loss": 1.896484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7265625, | |
| "rewards/margins": 4.375, | |
| "rewards/rejected": -1.64453125, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.239880059970015, | |
| "grad_norm": 47.25, | |
| "learning_rate": 9.555425502452038e-06, | |
| "logits/chosen": -1.744140625, | |
| "logits/rejected": -1.724609375, | |
| "logps/chosen": -1289.0, | |
| "logps/rejected": -1327.0, | |
| "loss": 0.845703125, | |
| "nll_loss": 1.751953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.05078125, | |
| "rewards/margins": 5.5, | |
| "rewards/rejected": -3.453125, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.24287856071964017, | |
| "grad_norm": 57.0, | |
| "learning_rate": 9.542062528882989e-06, | |
| "logits/chosen": -1.619140625, | |
| "logits/rejected": -1.607421875, | |
| "logps/chosen": -795.0625, | |
| "logps/rejected": -899.0, | |
| "loss": 0.7236328125, | |
| "nll_loss": 1.14697265625, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.2978515625, | |
| "rewards/margins": 4.11328125, | |
| "rewards/rejected": -2.8125, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.24587706146926536, | |
| "grad_norm": 63.5, | |
| "learning_rate": 9.528511315402358e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.791015625, | |
| "logps/chosen": -1182.25, | |
| "logps/rejected": -1469.75, | |
| "loss": 0.685546875, | |
| "nll_loss": 1.826171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.671875, | |
| "rewards/margins": 7.0078125, | |
| "rewards/rejected": -3.33203125, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.24887556221889057, | |
| "grad_norm": 71.5, | |
| "learning_rate": 9.514772423622675e-06, | |
| "logits/chosen": -1.73828125, | |
| "logits/rejected": -1.740234375, | |
| "logps/chosen": -1593.0, | |
| "logps/rejected": -1725.0, | |
| "loss": 0.8203125, | |
| "nll_loss": 2.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.953125, | |
| "rewards/margins": 7.2421875, | |
| "rewards/rejected": -3.2890625, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2518740629685157, | |
| "grad_norm": 44.5, | |
| "learning_rate": 9.500846422934557e-06, | |
| "logits/chosen": -1.7265625, | |
| "logits/rejected": -1.6953125, | |
| "logps/chosen": -1706.0, | |
| "logps/rejected": -1723.0, | |
| "loss": 0.724609375, | |
| "nll_loss": 2.033203125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.583984375, | |
| "rewards/margins": 6.1171875, | |
| "rewards/rejected": -3.53515625, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.25487256371814093, | |
| "grad_norm": 55.0, | |
| "learning_rate": 9.4867338904831e-06, | |
| "logits/chosen": -1.66796875, | |
| "logits/rejected": -1.630859375, | |
| "logps/chosen": -954.0, | |
| "logps/rejected": -1090.0, | |
| "loss": 0.7001953125, | |
| "nll_loss": 1.50390625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.728515625, | |
| "rewards/margins": 5.0625, | |
| "rewards/rejected": -3.33203125, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.25787106446776614, | |
| "grad_norm": 94.0, | |
| "learning_rate": 9.472435411143979e-06, | |
| "logits/chosen": -1.771484375, | |
| "logits/rejected": -1.798828125, | |
| "logps/chosen": -1596.0, | |
| "logps/rejected": -1594.0, | |
| "loss": 0.89453125, | |
| "nll_loss": 2.115234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.2578125, | |
| "rewards/margins": 5.765625, | |
| "rewards/rejected": -4.5078125, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2608695652173913, | |
| "grad_norm": 46.75, | |
| "learning_rate": 9.457951577499187e-06, | |
| "logits/chosen": -1.783203125, | |
| "logits/rejected": -1.744140625, | |
| "logps/chosen": -1765.0, | |
| "logps/rejected": -2003.0, | |
| "loss": 0.6748046875, | |
| "nll_loss": 2.07421875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.234375, | |
| "rewards/margins": 7.90625, | |
| "rewards/rejected": -4.671875, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.2638680659670165, | |
| "grad_norm": 40.5, | |
| "learning_rate": 9.443282989812494e-06, | |
| "logits/chosen": -1.748046875, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -1503.4375, | |
| "logps/rejected": -1483.0, | |
| "loss": 0.67919921875, | |
| "nll_loss": 2.0185546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.05078125, | |
| "rewards/margins": 7.390625, | |
| "rewards/rejected": -4.33984375, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.26686656671664166, | |
| "grad_norm": 105.5, | |
| "learning_rate": 9.428430256004558e-06, | |
| "logits/chosen": -1.63671875, | |
| "logits/rejected": -1.703125, | |
| "logps/chosen": -1257.0, | |
| "logps/rejected": -1324.0, | |
| "loss": 0.84716796875, | |
| "nll_loss": 1.6064453125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.99609375, | |
| "rewards/margins": 8.45703125, | |
| "rewards/rejected": -4.4453125, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.2698650674662669, | |
| "grad_norm": 60.0, | |
| "learning_rate": 9.413393991627737e-06, | |
| "logits/chosen": -1.552734375, | |
| "logits/rejected": -1.513671875, | |
| "logps/chosen": -624.09375, | |
| "logps/rejected": -702.125, | |
| "loss": 0.67236328125, | |
| "nll_loss": 1.203125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.826171875, | |
| "rewards/margins": 4.8828125, | |
| "rewards/rejected": -2.0625, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.272863568215892, | |
| "grad_norm": 83.5, | |
| "learning_rate": 9.398174819840577e-06, | |
| "logits/chosen": -1.63671875, | |
| "logits/rejected": -1.650390625, | |
| "logps/chosen": -1771.0, | |
| "logps/rejected": -1917.0, | |
| "loss": 0.732421875, | |
| "nll_loss": 2.076171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.205078125, | |
| "rewards/margins": 7.234375, | |
| "rewards/rejected": -3.03125, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.27586206896551724, | |
| "grad_norm": 92.0, | |
| "learning_rate": 9.382773371381986e-06, | |
| "logits/chosen": -1.759765625, | |
| "logits/rejected": -1.77734375, | |
| "logps/chosen": -1620.0, | |
| "logps/rejected": -1728.0, | |
| "loss": 0.8486328125, | |
| "nll_loss": 2.0859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.640625, | |
| "rewards/margins": 6.5625, | |
| "rewards/rejected": -1.921875, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.27886056971514245, | |
| "grad_norm": 60.5, | |
| "learning_rate": 9.367190284545087e-06, | |
| "logits/chosen": -1.6484375, | |
| "logits/rejected": -1.65625, | |
| "logps/chosen": -1077.5, | |
| "logps/rejected": -1267.0, | |
| "loss": 0.634765625, | |
| "nll_loss": 1.662109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.134765625, | |
| "rewards/margins": 7.2734375, | |
| "rewards/rejected": -3.140625, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.2818590704647676, | |
| "grad_norm": 70.5, | |
| "learning_rate": 9.351426205150778e-06, | |
| "logits/chosen": -1.708984375, | |
| "logits/rejected": -1.744140625, | |
| "logps/chosen": -1548.0, | |
| "logps/rejected": -1706.0, | |
| "loss": 0.755859375, | |
| "nll_loss": 1.7734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.359375, | |
| "rewards/margins": 5.890625, | |
| "rewards/rejected": -3.53125, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2848575712143928, | |
| "grad_norm": 32.25, | |
| "learning_rate": 9.335481786520955e-06, | |
| "logits/chosen": -1.66796875, | |
| "logits/rejected": -1.595703125, | |
| "logps/chosen": -977.625, | |
| "logps/rejected": -1124.0, | |
| "loss": 0.6640625, | |
| "nll_loss": 1.5087890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.158203125, | |
| "rewards/margins": 5.28515625, | |
| "rewards/rejected": -3.12890625, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.28785607196401797, | |
| "grad_norm": 45.25, | |
| "learning_rate": 9.319357689451444e-06, | |
| "logits/chosen": -1.845703125, | |
| "logits/rejected": -1.802734375, | |
| "logps/chosen": -1418.0, | |
| "logps/rejected": -1503.0, | |
| "loss": 0.7265625, | |
| "nll_loss": 1.974609375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.48046875, | |
| "rewards/margins": 7.765625, | |
| "rewards/rejected": -4.28125, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.2908545727136432, | |
| "grad_norm": 24.625, | |
| "learning_rate": 9.30305458218461e-06, | |
| "logits/chosen": -1.650390625, | |
| "logits/rejected": -1.640625, | |
| "logps/chosen": -1281.125, | |
| "logps/rejected": -1367.25, | |
| "loss": 0.52880859375, | |
| "nll_loss": 1.446044921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.453125, | |
| "rewards/margins": 11.46875, | |
| "rewards/rejected": -8.03125, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.2938530734632684, | |
| "grad_norm": 57.25, | |
| "learning_rate": 9.286573140381663e-06, | |
| "logits/chosen": -1.65234375, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -1049.5, | |
| "logps/rejected": -1349.5, | |
| "loss": 0.79296875, | |
| "nll_loss": 1.53515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.234375, | |
| "rewards/margins": 4.796875, | |
| "rewards/rejected": -3.5625, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.29685157421289354, | |
| "grad_norm": 55.25, | |
| "learning_rate": 9.26991404709466e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.708984375, | |
| "logps/chosen": -1141.0, | |
| "logps/rejected": -1280.0, | |
| "loss": 0.771484375, | |
| "nll_loss": 1.77734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.1640625, | |
| "rewards/margins": 8.7421875, | |
| "rewards/rejected": -6.59375, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.29985007496251875, | |
| "grad_norm": 49.25, | |
| "learning_rate": 9.253077992738193e-06, | |
| "logits/chosen": -1.771484375, | |
| "logits/rejected": -1.77734375, | |
| "logps/chosen": -1269.0, | |
| "logps/rejected": -1431.0, | |
| "loss": 0.66796875, | |
| "nll_loss": 1.712890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.61328125, | |
| "rewards/margins": 6.71875, | |
| "rewards/rejected": -4.1015625, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3028485757121439, | |
| "grad_norm": 32.75, | |
| "learning_rate": 9.236065675060775e-06, | |
| "logits/chosen": -1.794921875, | |
| "logits/rejected": -1.740234375, | |
| "logps/chosen": -1110.5, | |
| "logps/rejected": -1320.0, | |
| "loss": 0.673828125, | |
| "nll_loss": 1.9921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.16796875, | |
| "rewards/margins": 10.1953125, | |
| "rewards/rejected": -7.02734375, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.3058470764617691, | |
| "grad_norm": 90.0, | |
| "learning_rate": 9.218877799115929e-06, | |
| "logits/chosen": -1.701171875, | |
| "logits/rejected": -1.69140625, | |
| "logps/chosen": -1415.0, | |
| "logps/rejected": -1542.625, | |
| "loss": 0.791015625, | |
| "nll_loss": 1.62841796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.837890625, | |
| "rewards/margins": 7.8046875, | |
| "rewards/rejected": -5.96875, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.30884557721139433, | |
| "grad_norm": 62.5, | |
| "learning_rate": 9.201515077232958e-06, | |
| "logits/chosen": -1.67578125, | |
| "logits/rejected": -1.67578125, | |
| "logps/chosen": -1520.0, | |
| "logps/rejected": -1620.0, | |
| "loss": 0.5986328125, | |
| "nll_loss": 1.732421875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8125, | |
| "rewards/margins": 9.28125, | |
| "rewards/rejected": -6.46875, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.3118440779610195, | |
| "grad_norm": 43.0, | |
| "learning_rate": 9.183978228987436e-06, | |
| "logits/chosen": -1.650390625, | |
| "logits/rejected": -1.642578125, | |
| "logps/chosen": -1003.0, | |
| "logps/rejected": -1086.0, | |
| "loss": 0.646484375, | |
| "nll_loss": 1.595703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.23046875, | |
| "rewards/margins": 9.859375, | |
| "rewards/rejected": -7.6171875, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.3148425787106447, | |
| "grad_norm": 15.5, | |
| "learning_rate": 9.166267981171369e-06, | |
| "logits/chosen": -1.623046875, | |
| "logits/rejected": -1.62890625, | |
| "logps/chosen": -297.4375, | |
| "logps/rejected": -299.25, | |
| "loss": 0.4462890625, | |
| "nll_loss": 0.8310546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.98046875, | |
| "rewards/margins": 5.640625, | |
| "rewards/rejected": -3.66015625, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.31784107946026985, | |
| "grad_norm": 63.75, | |
| "learning_rate": 9.148385067763094e-06, | |
| "logits/chosen": -1.693359375, | |
| "logits/rejected": -1.708984375, | |
| "logps/chosen": -1387.25, | |
| "logps/rejected": -1392.5, | |
| "loss": 0.6611328125, | |
| "nll_loss": 1.66943359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.7109375, | |
| "rewards/margins": 6.71875, | |
| "rewards/rejected": -3.0078125, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.32083958020989506, | |
| "grad_norm": 82.5, | |
| "learning_rate": 9.130330229896846e-06, | |
| "logits/chosen": -1.8203125, | |
| "logits/rejected": -1.8125, | |
| "logps/chosen": -1944.0, | |
| "logps/rejected": -2220.0, | |
| "loss": 0.7900390625, | |
| "nll_loss": 2.06640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.390625, | |
| "rewards/margins": 7.890625, | |
| "rewards/rejected": -5.5, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.3238380809595202, | |
| "grad_norm": 36.5, | |
| "learning_rate": 9.112104215832047e-06, | |
| "logits/chosen": -1.5546875, | |
| "logits/rejected": -1.59375, | |
| "logps/chosen": -1111.25, | |
| "logps/rejected": -1300.5, | |
| "loss": 0.7431640625, | |
| "nll_loss": 1.3056640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8125, | |
| "rewards/margins": 5.55859375, | |
| "rewards/rejected": -2.74609375, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.3268365817091454, | |
| "grad_norm": 46.5, | |
| "learning_rate": 9.093707780922293e-06, | |
| "logits/chosen": -1.66015625, | |
| "logits/rejected": -1.662109375, | |
| "logps/chosen": -1114.0, | |
| "logps/rejected": -1274.0, | |
| "loss": 0.630859375, | |
| "nll_loss": 1.5625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8515625, | |
| "rewards/margins": 10.2421875, | |
| "rewards/rejected": -6.375, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.32983508245877063, | |
| "grad_norm": 38.75, | |
| "learning_rate": 9.075141687584056e-06, | |
| "logits/chosen": -1.58984375, | |
| "logits/rejected": -1.583984375, | |
| "logps/chosen": -279.25, | |
| "logps/rejected": -589.0, | |
| "loss": 0.583984375, | |
| "nll_loss": 0.9462890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.0625, | |
| "rewards/margins": 8.515625, | |
| "rewards/rejected": -6.46875, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.3328335832083958, | |
| "grad_norm": 38.5, | |
| "learning_rate": 9.056406705265084e-06, | |
| "logits/chosen": -1.609375, | |
| "logits/rejected": -1.638671875, | |
| "logps/chosen": -1255.0, | |
| "logps/rejected": -1392.5, | |
| "loss": 0.685546875, | |
| "nll_loss": 1.7734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.90625, | |
| "rewards/margins": 6.09375, | |
| "rewards/rejected": -3.1875, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.335832083958021, | |
| "grad_norm": 88.5, | |
| "learning_rate": 9.037503610412502e-06, | |
| "logits/chosen": -1.69921875, | |
| "logits/rejected": -1.7265625, | |
| "logps/chosen": -1242.5, | |
| "logps/rejected": -1498.75, | |
| "loss": 0.86669921875, | |
| "nll_loss": 1.60302734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7421875, | |
| "rewards/margins": 4.4609375, | |
| "rewards/rejected": -1.71875, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.33883058470764615, | |
| "grad_norm": 50.25, | |
| "learning_rate": 9.018433186440648e-06, | |
| "logits/chosen": -1.755859375, | |
| "logits/rejected": -1.7265625, | |
| "logps/chosen": -1106.0, | |
| "logps/rejected": -1189.0, | |
| "loss": 0.609375, | |
| "nll_loss": 1.650390625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.859375, | |
| "rewards/margins": 7.1171875, | |
| "rewards/rejected": -3.25390625, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.34182908545727136, | |
| "grad_norm": 52.25, | |
| "learning_rate": 8.999196223698599e-06, | |
| "logits/chosen": -1.775390625, | |
| "logits/rejected": -1.724609375, | |
| "logps/chosen": -1095.25, | |
| "logps/rejected": -1240.0, | |
| "loss": 0.818359375, | |
| "nll_loss": 1.3671875, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 2.66796875, | |
| "rewards/margins": 4.54296875, | |
| "rewards/rejected": -1.875, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 39.0, | |
| "learning_rate": 8.979793519437413e-06, | |
| "logits/chosen": -1.705078125, | |
| "logits/rejected": -1.693359375, | |
| "logps/chosen": -1506.0, | |
| "logps/rejected": -1627.0, | |
| "loss": 0.671875, | |
| "nll_loss": 1.873046875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.765625, | |
| "rewards/margins": 8.8671875, | |
| "rewards/rejected": -5.1015625, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.34782608695652173, | |
| "grad_norm": 39.0, | |
| "learning_rate": 8.960225877777095e-06, | |
| "logits/chosen": -1.650390625, | |
| "logits/rejected": -1.591796875, | |
| "logps/chosen": -1103.5, | |
| "logps/rejected": -1111.0, | |
| "loss": 0.62353515625, | |
| "nll_loss": 1.578125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.515625, | |
| "rewards/margins": 7.0234375, | |
| "rewards/rejected": -3.5078125, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.35082458770614694, | |
| "grad_norm": 15.375, | |
| "learning_rate": 8.940494109673266e-06, | |
| "logits/chosen": -1.650390625, | |
| "logits/rejected": -1.642578125, | |
| "logps/chosen": -637.25, | |
| "logps/rejected": -920.0, | |
| "loss": 0.611328125, | |
| "nll_loss": 1.068359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.22265625, | |
| "rewards/margins": 6.828125, | |
| "rewards/rejected": -4.60546875, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3538230884557721, | |
| "grad_norm": 51.0, | |
| "learning_rate": 8.920599032883553e-06, | |
| "logits/chosen": -1.76171875, | |
| "logits/rejected": -1.798828125, | |
| "logps/chosen": -1539.0, | |
| "logps/rejected": -1982.0, | |
| "loss": 0.673828125, | |
| "nll_loss": 1.96484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.140625, | |
| "rewards/margins": 10.8203125, | |
| "rewards/rejected": -7.6796875, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.3568215892053973, | |
| "grad_norm": 31.875, | |
| "learning_rate": 8.900541471933703e-06, | |
| "logits/chosen": -1.7734375, | |
| "logits/rejected": -1.75390625, | |
| "logps/chosen": -1640.0, | |
| "logps/rejected": -1754.0, | |
| "loss": 0.685546875, | |
| "nll_loss": 2.126953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.0390625, | |
| "rewards/margins": 9.140625, | |
| "rewards/rejected": -5.1015625, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3598200899550225, | |
| "grad_norm": 23.875, | |
| "learning_rate": 8.880322258083408e-06, | |
| "logits/chosen": -1.841796875, | |
| "logits/rejected": -1.767578125, | |
| "logps/chosen": -918.0, | |
| "logps/rejected": -1204.5, | |
| "loss": 0.4716796875, | |
| "nll_loss": 1.4248046875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.10546875, | |
| "rewards/margins": 9.109375, | |
| "rewards/rejected": -6.0, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.36281859070464767, | |
| "grad_norm": 34.75, | |
| "learning_rate": 8.859942229291856e-06, | |
| "logits/chosen": -1.701171875, | |
| "logits/rejected": -1.654296875, | |
| "logps/chosen": -1332.375, | |
| "logps/rejected": -1418.5, | |
| "loss": 0.61279296875, | |
| "nll_loss": 1.563232421875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.984375, | |
| "rewards/margins": 8.2109375, | |
| "rewards/rejected": -4.21875, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3658170914542729, | |
| "grad_norm": 56.5, | |
| "learning_rate": 8.839402230183e-06, | |
| "logits/chosen": -1.666015625, | |
| "logits/rejected": -1.634765625, | |
| "logps/chosen": -534.5, | |
| "logps/rejected": -806.0, | |
| "loss": 0.6767578125, | |
| "nll_loss": 1.126953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.32421875, | |
| "rewards/margins": 6.4921875, | |
| "rewards/rejected": -4.1875, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.36881559220389803, | |
| "grad_norm": 56.5, | |
| "learning_rate": 8.818703112010562e-06, | |
| "logits/chosen": -1.52734375, | |
| "logits/rejected": -1.5390625, | |
| "logps/chosen": -1107.25, | |
| "logps/rejected": -1183.25, | |
| "loss": 0.6611328125, | |
| "nll_loss": 1.7822265625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.57421875, | |
| "rewards/margins": 7.765625, | |
| "rewards/rejected": -5.1875, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.37181409295352325, | |
| "grad_norm": 110.5, | |
| "learning_rate": 8.797845732622742e-06, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -1.693359375, | |
| "logps/chosen": -2488.0, | |
| "logps/rejected": -2250.0, | |
| "loss": 0.8564453125, | |
| "nll_loss": 2.548828125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.375, | |
| "rewards/margins": 9.875, | |
| "rewards/rejected": -6.5, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.3748125937031484, | |
| "grad_norm": 27.0, | |
| "learning_rate": 8.776830956426674e-06, | |
| "logits/chosen": -1.634765625, | |
| "logits/rejected": -1.693359375, | |
| "logps/chosen": -1075.0, | |
| "logps/rejected": -1270.0, | |
| "loss": 0.6455078125, | |
| "nll_loss": 1.830078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.212890625, | |
| "rewards/margins": 7.96875, | |
| "rewards/rejected": -4.7578125, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3778110944527736, | |
| "grad_norm": 26.375, | |
| "learning_rate": 8.755659654352599e-06, | |
| "logits/chosen": -1.705078125, | |
| "logits/rejected": -1.66796875, | |
| "logps/chosen": -1637.0, | |
| "logps/rejected": -1700.0, | |
| "loss": 0.57861328125, | |
| "nll_loss": 1.87890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.88671875, | |
| "rewards/margins": 10.71875, | |
| "rewards/rejected": -6.828125, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.3808095952023988, | |
| "grad_norm": 36.0, | |
| "learning_rate": 8.734332703817771e-06, | |
| "logits/chosen": -1.677734375, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -1456.0, | |
| "logps/rejected": -1410.0, | |
| "loss": 0.57421875, | |
| "nll_loss": 1.71875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.90625, | |
| "rewards/margins": 9.9375, | |
| "rewards/rejected": -6.03125, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.383808095952024, | |
| "grad_norm": 34.5, | |
| "learning_rate": 8.712850988690094e-06, | |
| "logits/chosen": -1.697265625, | |
| "logits/rejected": -1.724609375, | |
| "logps/chosen": -1388.0, | |
| "logps/rejected": -1622.0, | |
| "loss": 0.55322265625, | |
| "nll_loss": 1.80859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.890625, | |
| "rewards/margins": 12.828125, | |
| "rewards/rejected": -6.9375, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3868065967016492, | |
| "grad_norm": 46.5, | |
| "learning_rate": 8.691215399251489e-06, | |
| "logits/chosen": -1.74609375, | |
| "logits/rejected": -1.732421875, | |
| "logps/chosen": -1243.25, | |
| "logps/rejected": -1332.0, | |
| "loss": 0.67578125, | |
| "nll_loss": 1.51416015625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.66015625, | |
| "rewards/margins": 6.03125, | |
| "rewards/rejected": -3.375, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.38980509745127434, | |
| "grad_norm": 89.5, | |
| "learning_rate": 8.669426832160997e-06, | |
| "logits/chosen": -1.779296875, | |
| "logits/rejected": -1.744140625, | |
| "logps/chosen": -1364.0, | |
| "logps/rejected": -1410.0, | |
| "loss": 0.6494140625, | |
| "nll_loss": 1.65234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.359375, | |
| "rewards/margins": 7.96875, | |
| "rewards/rejected": -3.6015625, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.39280359820089955, | |
| "grad_norm": 82.5, | |
| "learning_rate": 8.647486190417624e-06, | |
| "logits/chosen": -1.599609375, | |
| "logits/rejected": -1.63671875, | |
| "logps/chosen": -1187.5625, | |
| "logps/rejected": -1250.25, | |
| "loss": 0.7490234375, | |
| "nll_loss": 1.5283203125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.240234375, | |
| "rewards/margins": 7.0546875, | |
| "rewards/rejected": -2.8125, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.39580209895052476, | |
| "grad_norm": 53.25, | |
| "learning_rate": 8.625394383322914e-06, | |
| "logits/chosen": -1.677734375, | |
| "logits/rejected": -1.73046875, | |
| "logps/chosen": -1205.0, | |
| "logps/rejected": -1314.0, | |
| "loss": 0.8173828125, | |
| "nll_loss": 1.82421875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.3515625, | |
| "rewards/margins": 6.21875, | |
| "rewards/rejected": -1.8515625, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.3988005997001499, | |
| "grad_norm": 43.5, | |
| "learning_rate": 8.603152326443262e-06, | |
| "logits/chosen": -1.779296875, | |
| "logits/rejected": -1.80078125, | |
| "logps/chosen": -1489.0, | |
| "logps/rejected": -1477.0, | |
| "loss": 0.7080078125, | |
| "nll_loss": 1.990234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.50390625, | |
| "rewards/margins": 7.46875, | |
| "rewards/rejected": -3.97265625, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.4017991004497751, | |
| "grad_norm": 74.0, | |
| "learning_rate": 8.580760941571968e-06, | |
| "logits/chosen": -1.84375, | |
| "logits/rejected": -1.875, | |
| "logps/chosen": -1790.0, | |
| "logps/rejected": -1952.0, | |
| "loss": 0.75, | |
| "nll_loss": 2.193359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.390625, | |
| "rewards/margins": 8.3046875, | |
| "rewards/rejected": -4.9140625, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.4047976011994003, | |
| "grad_norm": 58.25, | |
| "learning_rate": 8.55822115669104e-06, | |
| "logits/chosen": -1.59765625, | |
| "logits/rejected": -1.734375, | |
| "logps/chosen": -1330.125, | |
| "logps/rejected": -1292.625, | |
| "loss": 0.77734375, | |
| "nll_loss": 1.89501953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.947265625, | |
| "rewards/margins": 6.953125, | |
| "rewards/rejected": -5.015625, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4077961019490255, | |
| "grad_norm": 23.125, | |
| "learning_rate": 8.535533905932739e-06, | |
| "logits/chosen": -1.751953125, | |
| "logits/rejected": -1.7578125, | |
| "logps/chosen": -918.0, | |
| "logps/rejected": -1206.0, | |
| "loss": 0.47705078125, | |
| "nll_loss": 1.18603515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.515625, | |
| "rewards/margins": 9.53125, | |
| "rewards/rejected": -6.015625, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.4107946026986507, | |
| "grad_norm": 32.0, | |
| "learning_rate": 8.512700129540847e-06, | |
| "logits/chosen": -1.65234375, | |
| "logits/rejected": -1.6796875, | |
| "logps/chosen": -954.625, | |
| "logps/rejected": -899.875, | |
| "loss": 0.591552734375, | |
| "nll_loss": 1.60595703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.78515625, | |
| "rewards/margins": 7.3359375, | |
| "rewards/rejected": -4.55078125, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.41379310344827586, | |
| "grad_norm": 74.5, | |
| "learning_rate": 8.489720773831717e-06, | |
| "logits/chosen": -1.7421875, | |
| "logits/rejected": -1.734375, | |
| "logps/chosen": -1874.0, | |
| "logps/rejected": -1946.0, | |
| "loss": 0.869140625, | |
| "nll_loss": 2.39453125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.1015625, | |
| "rewards/margins": 8.0625, | |
| "rewards/rejected": -2.953125, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.41679160419790107, | |
| "grad_norm": 28.875, | |
| "learning_rate": 8.466596791155055e-06, | |
| "logits/chosen": -1.66015625, | |
| "logits/rejected": -1.71484375, | |
| "logps/chosen": -1147.0, | |
| "logps/rejected": -1320.0, | |
| "loss": 0.697265625, | |
| "nll_loss": 1.8203125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.078125, | |
| "rewards/margins": 8.9375, | |
| "rewards/rejected": -5.859375, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4197901049475262, | |
| "grad_norm": 91.0, | |
| "learning_rate": 8.443329139854434e-06, | |
| "logits/chosen": -1.673828125, | |
| "logits/rejected": -1.646484375, | |
| "logps/chosen": -1091.0, | |
| "logps/rejected": -1208.0, | |
| "loss": 0.505615234375, | |
| "nll_loss": 1.451171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.078125, | |
| "rewards/margins": 11.875, | |
| "rewards/rejected": -6.78125, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.42278860569715143, | |
| "grad_norm": 47.5, | |
| "learning_rate": 8.419918784227592e-06, | |
| "logits/chosen": -1.767578125, | |
| "logits/rejected": -1.763671875, | |
| "logps/chosen": -1792.0, | |
| "logps/rejected": -1844.0, | |
| "loss": 0.724609375, | |
| "nll_loss": 2.212890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.03125, | |
| "rewards/margins": 8.7421875, | |
| "rewards/rejected": -4.7109375, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.4257871064467766, | |
| "grad_norm": 42.25, | |
| "learning_rate": 8.396366694486466e-06, | |
| "logits/chosen": -1.599609375, | |
| "logits/rejected": -1.6015625, | |
| "logps/chosen": -1004.5, | |
| "logps/rejected": -1242.0, | |
| "loss": 0.5654296875, | |
| "nll_loss": 1.5791015625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.91796875, | |
| "rewards/margins": 10.09375, | |
| "rewards/rejected": -6.1796875, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4287856071964018, | |
| "grad_norm": 26.5, | |
| "learning_rate": 8.372673846716977e-06, | |
| "logits/chosen": -1.80859375, | |
| "logits/rejected": -1.810546875, | |
| "logps/chosen": -1372.0, | |
| "logps/rejected": -1631.0, | |
| "loss": 0.625, | |
| "nll_loss": 1.96484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.2109375, | |
| "rewards/margins": 10.15625, | |
| "rewards/rejected": -5.9453125, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.431784107946027, | |
| "grad_norm": 46.5, | |
| "learning_rate": 8.348841222838579e-06, | |
| "logits/chosen": -1.77734375, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1485.0, | |
| "logps/rejected": -1534.0, | |
| "loss": 0.6240234375, | |
| "nll_loss": 1.837890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.0703125, | |
| "rewards/margins": 8.9765625, | |
| "rewards/rejected": -4.90625, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 43.75, | |
| "learning_rate": 8.324869810563573e-06, | |
| "logits/chosen": -1.716796875, | |
| "logits/rejected": -1.693359375, | |
| "logps/chosen": -1626.0, | |
| "logps/rejected": -1632.0, | |
| "loss": 0.6923828125, | |
| "nll_loss": 2.09765625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.52734375, | |
| "rewards/margins": 9.46875, | |
| "rewards/rejected": -4.9375, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.43778110944527737, | |
| "grad_norm": 65.5, | |
| "learning_rate": 8.30076060335616e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.771484375, | |
| "logps/chosen": -1584.0, | |
| "logps/rejected": -1770.0, | |
| "loss": 0.6015625, | |
| "nll_loss": 1.904296875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.9296875, | |
| "rewards/margins": 10.6171875, | |
| "rewards/rejected": -6.6875, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.4407796101949025, | |
| "grad_norm": 27.5, | |
| "learning_rate": 8.276514600391272e-06, | |
| "logits/chosen": -1.646484375, | |
| "logits/rejected": -1.65625, | |
| "logps/chosen": -968.5, | |
| "logps/rejected": -1189.0, | |
| "loss": 0.5771484375, | |
| "nll_loss": 1.681640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.62890625, | |
| "rewards/margins": 10.515625, | |
| "rewards/rejected": -5.89453125, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.44377811094452774, | |
| "grad_norm": 24.125, | |
| "learning_rate": 8.25213280651317e-06, | |
| "logits/chosen": -1.703125, | |
| "logits/rejected": -1.71484375, | |
| "logps/chosen": -1256.375, | |
| "logps/rejected": -1589.25, | |
| "loss": 0.5576171875, | |
| "nll_loss": 1.766845703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.91015625, | |
| "rewards/margins": 9.640625, | |
| "rewards/rejected": -5.7265625, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.44677661169415295, | |
| "grad_norm": 65.5, | |
| "learning_rate": 8.227616232193794e-06, | |
| "logits/chosen": -1.78125, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -2288.0, | |
| "logps/rejected": -2264.0, | |
| "loss": 0.783203125, | |
| "nll_loss": 2.48828125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.859375, | |
| "rewards/margins": 10.71875, | |
| "rewards/rejected": -6.859375, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.4497751124437781, | |
| "grad_norm": 21.25, | |
| "learning_rate": 8.202965893490877e-06, | |
| "logits/chosen": -1.6640625, | |
| "logits/rejected": -1.689453125, | |
| "logps/chosen": -500.125, | |
| "logps/rejected": -871.0, | |
| "loss": 0.44189453125, | |
| "nll_loss": 1.038330078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8203125, | |
| "rewards/margins": 8.515625, | |
| "rewards/rejected": -5.6875, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.4527736131934033, | |
| "grad_norm": 22.75, | |
| "learning_rate": 8.178182812005853e-06, | |
| "logits/chosen": -1.599609375, | |
| "logits/rejected": -1.611328125, | |
| "logps/chosen": -722.28125, | |
| "logps/rejected": -792.0, | |
| "loss": 0.4716796875, | |
| "nll_loss": 1.11962890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.11328125, | |
| "rewards/margins": 7.09375, | |
| "rewards/rejected": -3.984375, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.45577211394302847, | |
| "grad_norm": 49.75, | |
| "learning_rate": 8.153268014841507e-06, | |
| "logits/chosen": -1.673828125, | |
| "logits/rejected": -1.673828125, | |
| "logps/chosen": -1038.75, | |
| "logps/rejected": -1381.0, | |
| "loss": 0.568359375, | |
| "nll_loss": 1.6015625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.640625, | |
| "rewards/margins": 8.625, | |
| "rewards/rejected": -3.9921875, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4587706146926537, | |
| "grad_norm": 66.0, | |
| "learning_rate": 8.128222534559406e-06, | |
| "logits/chosen": -1.611328125, | |
| "logits/rejected": -1.701171875, | |
| "logps/chosen": -1522.203125, | |
| "logps/rejected": -1810.75, | |
| "loss": 0.57080078125, | |
| "nll_loss": 1.6414794921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.83984375, | |
| "rewards/margins": 8.53125, | |
| "rewards/rejected": -3.6875, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.4617691154422789, | |
| "grad_norm": 46.25, | |
| "learning_rate": 8.103047409137114e-06, | |
| "logits/chosen": -1.728515625, | |
| "logits/rejected": -1.7734375, | |
| "logps/chosen": -927.875, | |
| "logps/rejected": -1013.125, | |
| "loss": 0.59033203125, | |
| "nll_loss": 1.43798828125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5703125, | |
| "rewards/margins": 7.875, | |
| "rewards/rejected": -4.3125, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.46476761619190404, | |
| "grad_norm": 31.5, | |
| "learning_rate": 8.07774368192517e-06, | |
| "logits/chosen": -1.708984375, | |
| "logits/rejected": -1.728515625, | |
| "logps/chosen": -1434.875, | |
| "logps/rejected": -1416.0, | |
| "loss": 0.615234375, | |
| "nll_loss": 1.69482421875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.833984375, | |
| "rewards/margins": 8.578125, | |
| "rewards/rejected": -4.7421875, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.46776611694152925, | |
| "grad_norm": 24.0, | |
| "learning_rate": 8.052312401603848e-06, | |
| "logits/chosen": -1.6328125, | |
| "logits/rejected": -1.671875, | |
| "logps/chosen": -1212.0, | |
| "logps/rejected": -1376.0, | |
| "loss": 0.4765625, | |
| "nll_loss": 1.5703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.71875, | |
| "rewards/margins": 10.140625, | |
| "rewards/rejected": -5.421875, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.4707646176911544, | |
| "grad_norm": 41.5, | |
| "learning_rate": 8.026754622139691e-06, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.71875, | |
| "logps/chosen": -1130.0, | |
| "logps/rejected": -1293.0, | |
| "loss": 0.55859375, | |
| "nll_loss": 1.576171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4609375, | |
| "rewards/margins": 10.34375, | |
| "rewards/rejected": -6.8828125, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.4737631184407796, | |
| "grad_norm": 50.5, | |
| "learning_rate": 8.001071402741843e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.703125, | |
| "logps/chosen": -862.0, | |
| "logps/rejected": -1245.0, | |
| "loss": 0.5537109375, | |
| "nll_loss": 1.369140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.38671875, | |
| "rewards/margins": 8.34375, | |
| "rewards/rejected": -4.96875, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4767616191904048, | |
| "grad_norm": 21.0, | |
| "learning_rate": 7.975263807818136e-06, | |
| "logits/chosen": -1.72265625, | |
| "logits/rejected": -1.69921875, | |
| "logps/chosen": -802.625, | |
| "logps/rejected": -869.75, | |
| "loss": 0.49462890625, | |
| "nll_loss": 1.310546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.84765625, | |
| "rewards/margins": 7.1875, | |
| "rewards/rejected": -4.328125, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.47976011994003, | |
| "grad_norm": 57.0, | |
| "learning_rate": 7.949332906930995e-06, | |
| "logits/chosen": -1.603515625, | |
| "logits/rejected": -1.630859375, | |
| "logps/chosen": -857.0, | |
| "logps/rejected": -1061.0, | |
| "loss": 0.62939453125, | |
| "nll_loss": 1.65625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.265625, | |
| "rewards/margins": 7.6953125, | |
| "rewards/rejected": -3.4375, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4827586206896552, | |
| "grad_norm": 45.25, | |
| "learning_rate": 7.923279774753092e-06, | |
| "logits/chosen": -1.73046875, | |
| "logits/rejected": -1.6796875, | |
| "logps/chosen": -1773.0, | |
| "logps/rejected": -1804.0, | |
| "loss": 0.8701171875, | |
| "nll_loss": 1.921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.21484375, | |
| "rewards/margins": 8.25, | |
| "rewards/rejected": -5.03125, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.48575712143928035, | |
| "grad_norm": 32.0, | |
| "learning_rate": 7.897105491022819e-06, | |
| "logits/chosen": -1.64453125, | |
| "logits/rejected": -1.630859375, | |
| "logps/chosen": -1144.625, | |
| "logps/rejected": -1312.0, | |
| "loss": 0.4794921875, | |
| "nll_loss": 1.552734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.29296875, | |
| "rewards/margins": 10.84375, | |
| "rewards/rejected": -6.5546875, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.48875562218890556, | |
| "grad_norm": 29.5, | |
| "learning_rate": 7.870811140499543e-06, | |
| "logits/chosen": -1.724609375, | |
| "logits/rejected": -1.64453125, | |
| "logps/chosen": -1252.0, | |
| "logps/rejected": -1311.0, | |
| "loss": 0.5830078125, | |
| "nll_loss": 1.84375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.7890625, | |
| "rewards/margins": 13.140625, | |
| "rewards/rejected": -9.3671875, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.4917541229385307, | |
| "grad_norm": 37.5, | |
| "learning_rate": 7.844397812918637e-06, | |
| "logits/chosen": -1.81640625, | |
| "logits/rejected": -1.787109375, | |
| "logps/chosen": -1378.0, | |
| "logps/rejected": -1546.0, | |
| "loss": 0.6435546875, | |
| "nll_loss": 1.900390625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8671875, | |
| "rewards/margins": 7.921875, | |
| "rewards/rejected": -4.0546875, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4947526236881559, | |
| "grad_norm": 33.0, | |
| "learning_rate": 7.817866602946326e-06, | |
| "logits/chosen": -1.654296875, | |
| "logits/rejected": -1.630859375, | |
| "logps/chosen": -870.5, | |
| "logps/rejected": -933.75, | |
| "loss": 0.5341796875, | |
| "nll_loss": 1.21484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.765625, | |
| "rewards/margins": 7.3828125, | |
| "rewards/rejected": -3.6171875, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.49775112443778113, | |
| "grad_norm": 67.0, | |
| "learning_rate": 7.791218610134324e-06, | |
| "logits/chosen": -1.751953125, | |
| "logits/rejected": -1.76953125, | |
| "logps/chosen": -1784.0, | |
| "logps/rejected": -1921.0, | |
| "loss": 0.623046875, | |
| "nll_loss": 1.955078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.10546875, | |
| "rewards/margins": 9.359375, | |
| "rewards/rejected": -4.25, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.5007496251874063, | |
| "grad_norm": 73.0, | |
| "learning_rate": 7.764454938874252e-06, | |
| "logits/chosen": -1.78125, | |
| "logits/rejected": -1.853515625, | |
| "logps/chosen": -1023.0, | |
| "logps/rejected": -1186.5, | |
| "loss": 0.6767578125, | |
| "nll_loss": 1.677734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.92578125, | |
| "rewards/margins": 7.4296875, | |
| "rewards/rejected": -3.5, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.5037481259370314, | |
| "grad_norm": 15.0625, | |
| "learning_rate": 7.737576698351878e-06, | |
| "logits/chosen": -1.83984375, | |
| "logits/rejected": -1.79296875, | |
| "logps/chosen": -2036.0, | |
| "logps/rejected": -2070.0, | |
| "loss": 0.5791015625, | |
| "nll_loss": 2.158203125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.125, | |
| "rewards/margins": 11.875, | |
| "rewards/rejected": -6.75, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5067466266866567, | |
| "grad_norm": 15.4375, | |
| "learning_rate": 7.710585002501145e-06, | |
| "logits/chosen": -1.70703125, | |
| "logits/rejected": -1.71484375, | |
| "logps/chosen": -807.125, | |
| "logps/rejected": -880.5, | |
| "loss": 0.525390625, | |
| "nll_loss": 1.0146484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.58203125, | |
| "rewards/margins": 7.171875, | |
| "rewards/rejected": -4.5859375, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5097451274362819, | |
| "grad_norm": 45.5, | |
| "learning_rate": 7.683480969958005e-06, | |
| "logits/chosen": -1.740234375, | |
| "logits/rejected": -1.728515625, | |
| "logps/chosen": -1052.0, | |
| "logps/rejected": -1072.0, | |
| "loss": 0.609375, | |
| "nll_loss": 1.5546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.140625, | |
| "rewards/margins": 8.421875, | |
| "rewards/rejected": -6.28125, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.512743628185907, | |
| "grad_norm": 17.625, | |
| "learning_rate": 7.656265724014054e-06, | |
| "logits/chosen": -1.615234375, | |
| "logits/rejected": -1.611328125, | |
| "logps/chosen": -435.5, | |
| "logps/rejected": -552.125, | |
| "loss": 0.466796875, | |
| "nll_loss": 0.8681640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.078125, | |
| "rewards/margins": 6.640625, | |
| "rewards/rejected": -4.5625, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.5157421289355323, | |
| "grad_norm": 55.5, | |
| "learning_rate": 7.628940392569995e-06, | |
| "logits/chosen": -1.654296875, | |
| "logits/rejected": -1.662109375, | |
| "logps/chosen": -784.25, | |
| "logps/rejected": -1066.0, | |
| "loss": 0.6005859375, | |
| "nll_loss": 1.3203125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.77734375, | |
| "rewards/margins": 8.5, | |
| "rewards/rejected": -3.7265625, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5187406296851574, | |
| "grad_norm": 70.5, | |
| "learning_rate": 7.601506108088874e-06, | |
| "logits/chosen": -1.79296875, | |
| "logits/rejected": -1.8046875, | |
| "logps/chosen": -1818.0, | |
| "logps/rejected": -1958.0, | |
| "loss": 0.59619140625, | |
| "nll_loss": 1.869140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.65625, | |
| "rewards/margins": 9.71875, | |
| "rewards/rejected": -4.0625, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5217391304347826, | |
| "grad_norm": 35.75, | |
| "learning_rate": 7.5739640075491546e-06, | |
| "logits/chosen": -1.806640625, | |
| "logits/rejected": -1.720703125, | |
| "logps/chosen": -1321.0, | |
| "logps/rejected": -1439.5, | |
| "loss": 0.62744140625, | |
| "nll_loss": 1.97265625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.1015625, | |
| "rewards/margins": 9.0703125, | |
| "rewards/rejected": -4.96875, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5247376311844077, | |
| "grad_norm": 28.0, | |
| "learning_rate": 7.546315232397601e-06, | |
| "logits/chosen": -1.59375, | |
| "logits/rejected": -1.69921875, | |
| "logps/chosen": -1306.0, | |
| "logps/rejected": -1536.0, | |
| "loss": 0.5693359375, | |
| "nll_loss": 1.873046875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.2890625, | |
| "rewards/margins": 10.40625, | |
| "rewards/rejected": -6.125, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.527736131934033, | |
| "grad_norm": 52.75, | |
| "learning_rate": 7.518560928501969e-06, | |
| "logits/chosen": -1.720703125, | |
| "logits/rejected": -1.740234375, | |
| "logps/chosen": -1345.0, | |
| "logps/rejected": -1432.0, | |
| "loss": 0.65283203125, | |
| "nll_loss": 1.833984375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.103515625, | |
| "rewards/margins": 12.609375, | |
| "rewards/rejected": -8.51171875, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5307346326836582, | |
| "grad_norm": 40.0, | |
| "learning_rate": 7.4907022461035125e-06, | |
| "logits/chosen": -1.638671875, | |
| "logits/rejected": -1.63671875, | |
| "logps/chosen": -1102.25, | |
| "logps/rejected": -1063.75, | |
| "loss": 0.5537109375, | |
| "nll_loss": 1.18505859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.08984375, | |
| "rewards/margins": 10.4921875, | |
| "rewards/rejected": -7.40625, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5337331334332833, | |
| "grad_norm": 32.5, | |
| "learning_rate": 7.462740339769323e-06, | |
| "logits/chosen": -1.69140625, | |
| "logits/rejected": -1.744140625, | |
| "logps/chosen": -1708.0, | |
| "logps/rejected": -1704.0, | |
| "loss": 0.6611328125, | |
| "nll_loss": 2.015625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.4375, | |
| "rewards/margins": 9.734375, | |
| "rewards/rejected": -5.296875, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.5367316341829086, | |
| "grad_norm": 48.0, | |
| "learning_rate": 7.434676368344469e-06, | |
| "logits/chosen": -1.783203125, | |
| "logits/rejected": -1.75390625, | |
| "logps/chosen": -1914.0, | |
| "logps/rejected": -1940.0, | |
| "loss": 0.626953125, | |
| "nll_loss": 2.044921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.609375, | |
| "rewards/margins": 11.484375, | |
| "rewards/rejected": -6.875, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.5397301349325337, | |
| "grad_norm": 7.40625, | |
| "learning_rate": 7.406511494903982e-06, | |
| "logits/chosen": -1.61328125, | |
| "logits/rejected": -1.55859375, | |
| "logps/chosen": -1046.0, | |
| "logps/rejected": -1124.0, | |
| "loss": 0.498291015625, | |
| "nll_loss": 1.625244140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.203125, | |
| "rewards/margins": 12.671875, | |
| "rewards/rejected": -8.484375, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.5427286356821589, | |
| "grad_norm": 21.75, | |
| "learning_rate": 7.378246886704638e-06, | |
| "logits/chosen": -1.630859375, | |
| "logits/rejected": -1.66796875, | |
| "logps/chosen": -1207.0, | |
| "logps/rejected": -1420.0, | |
| "loss": 0.54052734375, | |
| "nll_loss": 1.6796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.90625, | |
| "rewards/margins": 9.5, | |
| "rewards/rejected": -5.5859375, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.545727136431784, | |
| "grad_norm": 36.25, | |
| "learning_rate": 7.349883715136601e-06, | |
| "logits/chosen": -1.59765625, | |
| "logits/rejected": -1.599609375, | |
| "logps/chosen": -1302.0, | |
| "logps/rejected": -1336.0, | |
| "loss": 0.6962890625, | |
| "nll_loss": 1.96875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.068359375, | |
| "rewards/margins": 9.5546875, | |
| "rewards/rejected": -6.484375, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.5487256371814093, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 7.321423155674858e-06, | |
| "logits/chosen": -1.79296875, | |
| "logits/rejected": -1.779296875, | |
| "logps/chosen": -1059.0, | |
| "logps/rejected": -1222.0, | |
| "loss": 0.59765625, | |
| "nll_loss": 1.802734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.62890625, | |
| "rewards/margins": 8.546875, | |
| "rewards/rejected": -4.90625, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.5517241379310345, | |
| "grad_norm": 46.25, | |
| "learning_rate": 7.292866387830515e-06, | |
| "logits/chosen": -1.693359375, | |
| "logits/rejected": -1.6015625, | |
| "logps/chosen": -1292.375, | |
| "logps/rejected": -1353.5625, | |
| "loss": 0.67578125, | |
| "nll_loss": 1.42626953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.45703125, | |
| "rewards/margins": 6.015625, | |
| "rewards/rejected": -3.55859375, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.5547226386806596, | |
| "grad_norm": 15.0625, | |
| "learning_rate": 7.264214595101913e-06, | |
| "logits/chosen": -1.61328125, | |
| "logits/rejected": -1.615234375, | |
| "logps/chosen": -1174.125, | |
| "logps/rejected": -1231.0, | |
| "loss": 0.4501953125, | |
| "nll_loss": 1.28564453125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5625, | |
| "rewards/margins": 11.4375, | |
| "rewards/rejected": -7.8671875, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5577211394302849, | |
| "grad_norm": 28.25, | |
| "learning_rate": 7.235468964925571e-06, | |
| "logits/chosen": -1.79296875, | |
| "logits/rejected": -1.794921875, | |
| "logps/chosen": -761.0625, | |
| "logps/rejected": -856.5625, | |
| "loss": 0.51318359375, | |
| "nll_loss": 1.46435546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.75, | |
| "rewards/margins": 8.6953125, | |
| "rewards/rejected": -4.953125, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.56071964017991, | |
| "grad_norm": 29.875, | |
| "learning_rate": 7.206630688626981e-06, | |
| "logits/chosen": -1.529296875, | |
| "logits/rejected": -1.591796875, | |
| "logps/chosen": -1120.25, | |
| "logps/rejected": -1366.0, | |
| "loss": 0.489013671875, | |
| "nll_loss": 1.61376953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.9453125, | |
| "rewards/margins": 12.796875, | |
| "rewards/rejected": -8.859375, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.5637181409295352, | |
| "grad_norm": 8.5625, | |
| "learning_rate": 7.177700961371239e-06, | |
| "logits/chosen": -1.673828125, | |
| "logits/rejected": -1.673828125, | |
| "logps/chosen": -423.8125, | |
| "logps/rejected": -450.5, | |
| "loss": 0.392578125, | |
| "nll_loss": 0.90478515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7578125, | |
| "rewards/margins": 9.1640625, | |
| "rewards/rejected": -6.3984375, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5667166416791605, | |
| "grad_norm": 47.75, | |
| "learning_rate": 7.148680982113502e-06, | |
| "logits/chosen": -1.685546875, | |
| "logits/rejected": -1.720703125, | |
| "logps/chosen": -1484.0, | |
| "logps/rejected": -1674.0, | |
| "loss": 0.56640625, | |
| "nll_loss": 1.900390625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.8828125, | |
| "rewards/margins": 14.34375, | |
| "rewards/rejected": -7.453125, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.5697151424287856, | |
| "grad_norm": 72.0, | |
| "learning_rate": 7.119571953549305e-06, | |
| "logits/chosen": -1.677734375, | |
| "logits/rejected": -1.6953125, | |
| "logps/chosen": -1128.1875, | |
| "logps/rejected": -1395.5, | |
| "loss": 0.5537109375, | |
| "nll_loss": 1.513671875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.984375, | |
| "rewards/margins": 10.75, | |
| "rewards/rejected": -5.765625, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5727136431784108, | |
| "grad_norm": 45.75, | |
| "learning_rate": 7.0903750820647175e-06, | |
| "logits/chosen": -1.78515625, | |
| "logits/rejected": -1.708984375, | |
| "logps/chosen": -858.75, | |
| "logps/rejected": -956.5, | |
| "loss": 0.50341796875, | |
| "nll_loss": 1.38330078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.9375, | |
| "rewards/margins": 8.140625, | |
| "rewards/rejected": -4.203125, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5757121439280359, | |
| "grad_norm": 22.75, | |
| "learning_rate": 7.061091577686349e-06, | |
| "logits/chosen": -1.759765625, | |
| "logits/rejected": -1.8046875, | |
| "logps/chosen": -1055.5, | |
| "logps/rejected": -1149.5, | |
| "loss": 0.58642578125, | |
| "nll_loss": 1.42431640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.828125, | |
| "rewards/margins": 9.125, | |
| "rewards/rejected": -5.296875, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5787106446776612, | |
| "grad_norm": 75.5, | |
| "learning_rate": 7.031722654031192e-06, | |
| "logits/chosen": -1.775390625, | |
| "logits/rejected": -1.751953125, | |
| "logps/chosen": -1746.0, | |
| "logps/rejected": -1844.0, | |
| "loss": 0.77734375, | |
| "nll_loss": 2.291015625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.80859375, | |
| "rewards/margins": 9.703125, | |
| "rewards/rejected": -3.8984375, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.5817091454272864, | |
| "grad_norm": 21.875, | |
| "learning_rate": 7.002269528256334e-06, | |
| "logits/chosen": -1.662109375, | |
| "logits/rejected": -1.658203125, | |
| "logps/chosen": -1760.0, | |
| "logps/rejected": -1872.0, | |
| "loss": 0.5302734375, | |
| "nll_loss": 1.912109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.296875, | |
| "rewards/margins": 17.09375, | |
| "rewards/rejected": -11.78125, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5847076461769115, | |
| "grad_norm": 35.0, | |
| "learning_rate": 6.972733421008505e-06, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.70703125, | |
| "logps/chosen": -1538.0, | |
| "logps/rejected": -1708.0, | |
| "loss": 0.7744140625, | |
| "nll_loss": 2.29296875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.828125, | |
| "rewards/margins": 9.140625, | |
| "rewards/rejected": -5.3203125, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5877061469265368, | |
| "grad_norm": 22.625, | |
| "learning_rate": 6.943115556373503e-06, | |
| "logits/chosen": -1.63671875, | |
| "logits/rejected": -1.650390625, | |
| "logps/chosen": -1166.0, | |
| "logps/rejected": -1293.0, | |
| "loss": 0.470703125, | |
| "nll_loss": 1.443359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.28125, | |
| "rewards/margins": 10.578125, | |
| "rewards/rejected": -6.296875, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5907046476761619, | |
| "grad_norm": 18.75, | |
| "learning_rate": 6.913417161825449e-06, | |
| "logits/chosen": -1.685546875, | |
| "logits/rejected": -1.65625, | |
| "logps/chosen": -1334.0, | |
| "logps/rejected": -1528.0, | |
| "loss": 0.591796875, | |
| "nll_loss": 1.921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.296875, | |
| "rewards/margins": 11.0390625, | |
| "rewards/rejected": -6.7421875, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.5937031484257871, | |
| "grad_norm": 49.0, | |
| "learning_rate": 6.883639468175926e-06, | |
| "logits/chosen": -1.66796875, | |
| "logits/rejected": -1.650390625, | |
| "logps/chosen": -1457.75, | |
| "logps/rejected": -1496.0, | |
| "loss": 0.662109375, | |
| "nll_loss": 1.81884765625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.33203125, | |
| "rewards/margins": 11.1796875, | |
| "rewards/rejected": -7.8828125, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5967016491754122, | |
| "grad_norm": 40.5, | |
| "learning_rate": 6.853783709522963e-06, | |
| "logits/chosen": -1.57421875, | |
| "logits/rejected": -1.623046875, | |
| "logps/chosen": -1197.0, | |
| "logps/rejected": -1352.0, | |
| "loss": 0.7451171875, | |
| "nll_loss": 1.91015625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9375, | |
| "rewards/margins": 7.1171875, | |
| "rewards/rejected": -4.1796875, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5997001499250375, | |
| "grad_norm": 16.875, | |
| "learning_rate": 6.823851123199894e-06, | |
| "logits/chosen": -1.716796875, | |
| "logits/rejected": -1.69140625, | |
| "logps/chosen": -1744.0, | |
| "logps/rejected": -1844.0, | |
| "loss": 0.56640625, | |
| "nll_loss": 2.04296875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.82421875, | |
| "rewards/margins": 14.609375, | |
| "rewards/rejected": -9.78125, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6026986506746627, | |
| "grad_norm": 14.75, | |
| "learning_rate": 6.793842949724074e-06, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.69921875, | |
| "logps/chosen": -1252.15625, | |
| "logps/rejected": -1181.1875, | |
| "loss": 0.5322265625, | |
| "nll_loss": 1.7593994140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5, | |
| "rewards/margins": 10.671875, | |
| "rewards/rejected": -7.1640625, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.6056971514242878, | |
| "grad_norm": 33.75, | |
| "learning_rate": 6.763760432745475e-06, | |
| "logits/chosen": -1.76171875, | |
| "logits/rejected": -1.818359375, | |
| "logps/chosen": -1228.71875, | |
| "logps/rejected": -1509.5, | |
| "loss": 0.57421875, | |
| "nll_loss": 1.74951171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.51953125, | |
| "rewards/margins": 10.640625, | |
| "rewards/rejected": -6.1171875, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.6086956521739131, | |
| "grad_norm": 23.375, | |
| "learning_rate": 6.733604818995133e-06, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.609375, | |
| "logps/chosen": -875.078125, | |
| "logps/rejected": -1057.0, | |
| "loss": 0.46142578125, | |
| "nll_loss": 1.115234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.4140625, | |
| "rewards/margins": 9.9765625, | |
| "rewards/rejected": -5.5546875, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.6116941529235382, | |
| "grad_norm": 58.0, | |
| "learning_rate": 6.703377358233489e-06, | |
| "logits/chosen": -1.68359375, | |
| "logits/rejected": -1.712890625, | |
| "logps/chosen": -1729.0, | |
| "logps/rejected": -1954.0, | |
| "loss": 0.62890625, | |
| "nll_loss": 1.9140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.359375, | |
| "rewards/margins": 9.921875, | |
| "rewards/rejected": -6.5625, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.6146926536731634, | |
| "grad_norm": 62.5, | |
| "learning_rate": 6.673079303198591e-06, | |
| "logits/chosen": -1.759765625, | |
| "logits/rejected": -1.751953125, | |
| "logps/chosen": -1221.0, | |
| "logps/rejected": -1140.0, | |
| "loss": 0.61376953125, | |
| "nll_loss": 1.833984375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6484375, | |
| "rewards/margins": 10.375, | |
| "rewards/rejected": -6.734375, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.6176911544227887, | |
| "grad_norm": 8.75, | |
| "learning_rate": 6.6427119095541745e-06, | |
| "logits/chosen": -1.783203125, | |
| "logits/rejected": -1.7734375, | |
| "logps/chosen": -1370.0, | |
| "logps/rejected": -1515.0, | |
| "loss": 0.56201171875, | |
| "nll_loss": 1.927734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.203125, | |
| "rewards/margins": 11.234375, | |
| "rewards/rejected": -6.046875, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.6206896551724138, | |
| "grad_norm": 33.0, | |
| "learning_rate": 6.612276435837622e-06, | |
| "logits/chosen": -1.783203125, | |
| "logits/rejected": -1.75390625, | |
| "logps/chosen": -1521.0, | |
| "logps/rejected": -1367.0, | |
| "loss": 0.595703125, | |
| "nll_loss": 2.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.0625, | |
| "rewards/margins": 9.640625, | |
| "rewards/rejected": -4.578125, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.623688155922039, | |
| "grad_norm": 38.0, | |
| "learning_rate": 6.58177414340781e-06, | |
| "logits/chosen": -1.65625, | |
| "logits/rejected": -1.638671875, | |
| "logps/chosen": -1010.5, | |
| "logps/rejected": -1075.75, | |
| "loss": 0.405029296875, | |
| "nll_loss": 1.109619140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.84375, | |
| "rewards/margins": 11.75, | |
| "rewards/rejected": -7.890625, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.6266866566716641, | |
| "grad_norm": 50.75, | |
| "learning_rate": 6.551206296392827e-06, | |
| "logits/chosen": -1.697265625, | |
| "logits/rejected": -1.7265625, | |
| "logps/chosen": -1423.0, | |
| "logps/rejected": -1568.0, | |
| "loss": 0.5517578125, | |
| "nll_loss": 1.68359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.03125, | |
| "rewards/margins": 11.265625, | |
| "rewards/rejected": -6.234375, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.6296851574212894, | |
| "grad_norm": 52.25, | |
| "learning_rate": 6.520574161637591e-06, | |
| "logits/chosen": -1.728515625, | |
| "logits/rejected": -1.7265625, | |
| "logps/chosen": -1304.75, | |
| "logps/rejected": -1336.625, | |
| "loss": 0.55419921875, | |
| "nll_loss": 1.566162109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.375, | |
| "rewards/margins": 9.59375, | |
| "rewards/rejected": -5.2109375, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6326836581709145, | |
| "grad_norm": 54.25, | |
| "learning_rate": 6.4898790086513366e-06, | |
| "logits/chosen": -1.63671875, | |
| "logits/rejected": -1.66796875, | |
| "logps/chosen": -813.03125, | |
| "logps/rejected": -885.25, | |
| "loss": 0.56689453125, | |
| "nll_loss": 1.57763671875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.64453125, | |
| "rewards/margins": 8.28125, | |
| "rewards/rejected": -4.625, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.6356821589205397, | |
| "grad_norm": 33.0, | |
| "learning_rate": 6.459122109555011e-06, | |
| "logits/chosen": -1.734375, | |
| "logits/rejected": -1.67578125, | |
| "logps/chosen": -1351.125, | |
| "logps/rejected": -1525.0, | |
| "loss": 0.625, | |
| "nll_loss": 1.5732421875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.296875, | |
| "rewards/margins": 8.03125, | |
| "rewards/rejected": -4.73046875, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.638680659670165, | |
| "grad_norm": 45.25, | |
| "learning_rate": 6.42830473902855e-06, | |
| "logits/chosen": -1.771484375, | |
| "logits/rejected": -1.740234375, | |
| "logps/chosen": -1980.0, | |
| "logps/rejected": -2148.0, | |
| "loss": 0.6806640625, | |
| "nll_loss": 2.06640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.75, | |
| "rewards/margins": 11.125, | |
| "rewards/rejected": -6.375, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.6416791604197901, | |
| "grad_norm": 26.75, | |
| "learning_rate": 6.397428174258048e-06, | |
| "logits/chosen": -1.681640625, | |
| "logits/rejected": -1.775390625, | |
| "logps/chosen": -1752.0, | |
| "logps/rejected": -1716.0, | |
| "loss": 0.6748046875, | |
| "nll_loss": 2.41796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.921875, | |
| "rewards/margins": 10.828125, | |
| "rewards/rejected": -5.90625, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.6446776611694153, | |
| "grad_norm": 95.0, | |
| "learning_rate": 6.3664936948828296e-06, | |
| "logits/chosen": -1.806640625, | |
| "logits/rejected": -1.8125, | |
| "logps/chosen": -1670.0, | |
| "logps/rejected": -1818.0, | |
| "loss": 0.6787109375, | |
| "nll_loss": 1.974609375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.85546875, | |
| "rewards/margins": 10.8828125, | |
| "rewards/rejected": -8.03125, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.6476761619190404, | |
| "grad_norm": 12.75, | |
| "learning_rate": 6.335502582942409e-06, | |
| "logits/chosen": -1.712890625, | |
| "logits/rejected": -1.685546875, | |
| "logps/chosen": -991.0, | |
| "logps/rejected": -1086.0, | |
| "loss": 0.44482421875, | |
| "nll_loss": 1.37890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.5625, | |
| "rewards/margins": 13.9375, | |
| "rewards/rejected": -8.390625, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.6506746626686657, | |
| "grad_norm": 32.5, | |
| "learning_rate": 6.304456122823377e-06, | |
| "logits/chosen": -1.841796875, | |
| "logits/rejected": -1.802734375, | |
| "logps/chosen": -1886.0, | |
| "logps/rejected": -2042.0, | |
| "loss": 0.6201171875, | |
| "nll_loss": 2.13671875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.75, | |
| "rewards/margins": 10.859375, | |
| "rewards/rejected": -5.109375, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.6536731634182908, | |
| "grad_norm": 9.5, | |
| "learning_rate": 6.273355601206143e-06, | |
| "logits/chosen": -1.8046875, | |
| "logits/rejected": -1.783203125, | |
| "logps/chosen": -834.5, | |
| "logps/rejected": -1077.5, | |
| "loss": 0.545654296875, | |
| "nll_loss": 1.71435546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.46875, | |
| "rewards/margins": 10.46875, | |
| "rewards/rejected": -6.015625, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.656671664167916, | |
| "grad_norm": 33.5, | |
| "learning_rate": 6.24220230701164e-06, | |
| "logits/chosen": -1.83984375, | |
| "logits/rejected": -1.80078125, | |
| "logps/chosen": -1300.0, | |
| "logps/rejected": -1638.0, | |
| "loss": 0.58984375, | |
| "nll_loss": 1.958984375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.875, | |
| "rewards/margins": 12.28125, | |
| "rewards/rejected": -6.40625, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.6596701649175413, | |
| "grad_norm": 38.5, | |
| "learning_rate": 6.210997531347879e-06, | |
| "logits/chosen": -1.685546875, | |
| "logits/rejected": -1.69921875, | |
| "logps/chosen": -1680.0, | |
| "logps/rejected": -1700.0, | |
| "loss": 0.5849609375, | |
| "nll_loss": 1.943359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.421875, | |
| "rewards/margins": 14.34375, | |
| "rewards/rejected": -8.921875, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.6626686656671664, | |
| "grad_norm": 32.25, | |
| "learning_rate": 6.179742567456464e-06, | |
| "logits/chosen": -1.583984375, | |
| "logits/rejected": -1.58203125, | |
| "logps/chosen": -879.25, | |
| "logps/rejected": -962.5, | |
| "loss": 0.52294921875, | |
| "nll_loss": 1.542236328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0234375, | |
| "rewards/margins": 9.5390625, | |
| "rewards/rejected": -6.515625, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.6656671664167916, | |
| "grad_norm": 43.75, | |
| "learning_rate": 6.148438710658979e-06, | |
| "logits/chosen": -1.759765625, | |
| "logits/rejected": -1.759765625, | |
| "logps/chosen": -1223.0, | |
| "logps/rejected": -1305.75, | |
| "loss": 0.61279296875, | |
| "nll_loss": 1.720703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.8828125, | |
| "rewards/margins": 9.3203125, | |
| "rewards/rejected": -5.4453125, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.6686656671664168, | |
| "grad_norm": 65.0, | |
| "learning_rate": 6.117087258303314e-06, | |
| "logits/chosen": -1.79296875, | |
| "logits/rejected": -1.755859375, | |
| "logps/chosen": -1077.0, | |
| "logps/rejected": -1361.0, | |
| "loss": 0.68310546875, | |
| "nll_loss": 1.5771484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.59375, | |
| "rewards/margins": 8.921875, | |
| "rewards/rejected": -5.328125, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.671664167916042, | |
| "grad_norm": 44.75, | |
| "learning_rate": 6.085689509709893e-06, | |
| "logits/chosen": -1.837890625, | |
| "logits/rejected": -1.791015625, | |
| "logps/chosen": -1747.0, | |
| "logps/rejected": -1970.0, | |
| "loss": 0.59716796875, | |
| "nll_loss": 2.078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.609375, | |
| "rewards/margins": 12.265625, | |
| "rewards/rejected": -7.65625, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6746626686656672, | |
| "grad_norm": 31.125, | |
| "learning_rate": 6.0542467661178325e-06, | |
| "logits/chosen": -1.630859375, | |
| "logits/rejected": -1.771484375, | |
| "logps/chosen": -1452.0, | |
| "logps/rejected": -1715.0, | |
| "loss": 0.5498046875, | |
| "nll_loss": 1.876953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.2890625, | |
| "rewards/margins": 11.5, | |
| "rewards/rejected": -6.203125, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.6776611694152923, | |
| "grad_norm": 49.75, | |
| "learning_rate": 6.022760330631006e-06, | |
| "logits/chosen": -1.693359375, | |
| "logits/rejected": -1.6484375, | |
| "logps/chosen": -782.25, | |
| "logps/rejected": -844.75, | |
| "loss": 0.5302734375, | |
| "nll_loss": 1.40380859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.888671875, | |
| "rewards/margins": 9.3828125, | |
| "rewards/rejected": -5.484375, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6806596701649176, | |
| "grad_norm": 10.1875, | |
| "learning_rate": 5.991231508164037e-06, | |
| "logits/chosen": -1.751953125, | |
| "logits/rejected": -1.701171875, | |
| "logps/chosen": -1448.0, | |
| "logps/rejected": -1553.0, | |
| "loss": 0.689453125, | |
| "nll_loss": 2.484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.95703125, | |
| "rewards/margins": 10.9375, | |
| "rewards/rejected": -6.0, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6836581709145427, | |
| "grad_norm": 30.25, | |
| "learning_rate": 5.959661605388229e-06, | |
| "logits/chosen": -1.77734375, | |
| "logits/rejected": -1.75, | |
| "logps/chosen": -1412.0, | |
| "logps/rejected": -1804.0, | |
| "loss": 0.5830078125, | |
| "nll_loss": 1.7890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.83984375, | |
| "rewards/margins": 12.15625, | |
| "rewards/rejected": -7.3125, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6866566716641679, | |
| "grad_norm": 24.5, | |
| "learning_rate": 5.928051930677404e-06, | |
| "logits/chosen": -1.6953125, | |
| "logits/rejected": -1.6640625, | |
| "logps/chosen": -1169.25, | |
| "logps/rejected": -1307.125, | |
| "loss": 0.5185546875, | |
| "nll_loss": 1.71630859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.76953125, | |
| "rewards/margins": 13.53125, | |
| "rewards/rejected": -8.7890625, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 28.25, | |
| "learning_rate": 5.896403794053679e-06, | |
| "logits/chosen": -1.7109375, | |
| "logits/rejected": -1.802734375, | |
| "logps/chosen": -1612.0, | |
| "logps/rejected": -1782.0, | |
| "loss": 0.626953125, | |
| "nll_loss": 2.037109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.171875, | |
| "rewards/margins": 14.265625, | |
| "rewards/rejected": -9.09375, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6926536731634183, | |
| "grad_norm": 33.75, | |
| "learning_rate": 5.864718507133176e-06, | |
| "logits/chosen": -1.818359375, | |
| "logits/rejected": -1.8515625, | |
| "logps/chosen": -812.0, | |
| "logps/rejected": -1038.0, | |
| "loss": 0.642578125, | |
| "nll_loss": 1.931640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5703125, | |
| "rewards/margins": 8.421875, | |
| "rewards/rejected": -4.84375, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 22.5, | |
| "learning_rate": 5.83299738307166e-06, | |
| "logits/chosen": -1.5703125, | |
| "logits/rejected": -1.6640625, | |
| "logps/chosen": -871.484375, | |
| "logps/rejected": -1067.5, | |
| "loss": 0.63232421875, | |
| "nll_loss": 1.5755615234375, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 3.734375, | |
| "rewards/margins": 8.78125, | |
| "rewards/rejected": -5.0390625, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6986506746626686, | |
| "grad_norm": 19.0, | |
| "learning_rate": 5.801241736510128e-06, | |
| "logits/chosen": -1.720703125, | |
| "logits/rejected": -1.76953125, | |
| "logps/chosen": -1410.0, | |
| "logps/rejected": -1494.0, | |
| "loss": 0.7099609375, | |
| "nll_loss": 2.193359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.484375, | |
| "rewards/margins": 11.015625, | |
| "rewards/rejected": -6.53125, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.7016491754122939, | |
| "grad_norm": 47.0, | |
| "learning_rate": 5.76945288352031e-06, | |
| "logits/chosen": -1.763671875, | |
| "logits/rejected": -1.798828125, | |
| "logps/chosen": -1369.0, | |
| "logps/rejected": -1628.0, | |
| "loss": 0.57958984375, | |
| "nll_loss": 1.720703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.875, | |
| "rewards/margins": 10.578125, | |
| "rewards/rejected": -6.703125, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.704647676161919, | |
| "grad_norm": 12.5625, | |
| "learning_rate": 5.7376321415501356e-06, | |
| "logits/chosen": -1.826171875, | |
| "logits/rejected": -1.80859375, | |
| "logps/chosen": -1571.0, | |
| "logps/rejected": -1778.0, | |
| "loss": 0.53076171875, | |
| "nll_loss": 1.9375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.6171875, | |
| "rewards/margins": 10.421875, | |
| "rewards/rejected": -5.8046875, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7076461769115442, | |
| "grad_norm": 22.0, | |
| "learning_rate": 5.7057808293691305e-06, | |
| "logits/chosen": -1.681640625, | |
| "logits/rejected": -1.68359375, | |
| "logps/chosen": -918.0, | |
| "logps/rejected": -1000.0, | |
| "loss": 0.62646484375, | |
| "nll_loss": 1.724609375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3359375, | |
| "rewards/margins": 9.421875, | |
| "rewards/rejected": -6.078125, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.7106446776611695, | |
| "grad_norm": 23.875, | |
| "learning_rate": 5.67390026701377e-06, | |
| "logits/chosen": -1.796875, | |
| "logits/rejected": -1.7890625, | |
| "logps/chosen": -1216.0, | |
| "logps/rejected": -1272.0, | |
| "loss": 0.5439453125, | |
| "nll_loss": 1.720703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.7890625, | |
| "rewards/margins": 11.140625, | |
| "rewards/rejected": -6.34375, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.7136431784107946, | |
| "grad_norm": 38.0, | |
| "learning_rate": 5.641991775732756e-06, | |
| "logits/chosen": -1.734375, | |
| "logits/rejected": -1.685546875, | |
| "logps/chosen": -1150.0, | |
| "logps/rejected": -1274.0, | |
| "loss": 0.64697265625, | |
| "nll_loss": 1.87109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.52734375, | |
| "rewards/margins": 9.6328125, | |
| "rewards/rejected": -6.1015625, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.7166416791604198, | |
| "grad_norm": 29.0, | |
| "learning_rate": 5.610056677932274e-06, | |
| "logits/chosen": -1.525390625, | |
| "logits/rejected": -1.54296875, | |
| "logps/chosen": -524.6875, | |
| "logps/rejected": -573.625, | |
| "loss": 0.39794921875, | |
| "nll_loss": 0.998779296875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9140625, | |
| "rewards/margins": 8.484375, | |
| "rewards/rejected": -5.5625, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.719640179910045, | |
| "grad_norm": 30.5, | |
| "learning_rate": 5.5780962971211795e-06, | |
| "logits/chosen": -1.6953125, | |
| "logits/rejected": -1.71484375, | |
| "logps/chosen": -1165.0, | |
| "logps/rejected": -1233.0, | |
| "loss": 0.5751953125, | |
| "nll_loss": 1.763671875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6953125, | |
| "rewards/margins": 9.40625, | |
| "rewards/rejected": -5.71875, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.7226386806596702, | |
| "grad_norm": 45.5, | |
| "learning_rate": 5.546111957856155e-06, | |
| "logits/chosen": -1.814453125, | |
| "logits/rejected": -1.849609375, | |
| "logps/chosen": -2036.0, | |
| "logps/rejected": -2018.0, | |
| "loss": 0.685546875, | |
| "nll_loss": 2.298828125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.125, | |
| "rewards/margins": 10.25, | |
| "rewards/rejected": -5.125, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.7256371814092953, | |
| "grad_norm": 37.5, | |
| "learning_rate": 5.514104985686802e-06, | |
| "logits/chosen": -1.7421875, | |
| "logits/rejected": -1.703125, | |
| "logps/chosen": -1124.25, | |
| "logps/rejected": -1128.5, | |
| "loss": 0.54736328125, | |
| "nll_loss": 1.5068359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.34375, | |
| "rewards/margins": 10.578125, | |
| "rewards/rejected": -7.25, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.7286356821589205, | |
| "grad_norm": 26.5, | |
| "learning_rate": 5.482076707100723e-06, | |
| "logits/chosen": -1.69140625, | |
| "logits/rejected": -1.65234375, | |
| "logps/chosen": -1042.1875, | |
| "logps/rejected": -1090.25, | |
| "loss": 0.49267578125, | |
| "nll_loss": 1.499267578125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.7265625, | |
| "rewards/margins": 8.9765625, | |
| "rewards/rejected": -5.25, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.7316341829085458, | |
| "grad_norm": 9.0625, | |
| "learning_rate": 5.4500284494685275e-06, | |
| "logits/chosen": -1.763671875, | |
| "logits/rejected": -1.833984375, | |
| "logps/chosen": -1126.0, | |
| "logps/rejected": -1384.0, | |
| "loss": 0.4873046875, | |
| "nll_loss": 1.6796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.1875, | |
| "rewards/margins": 13.546875, | |
| "rewards/rejected": -8.375, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.7346326836581709, | |
| "grad_norm": 13.0, | |
| "learning_rate": 5.417961540988837e-06, | |
| "logits/chosen": -1.640625, | |
| "logits/rejected": -1.697265625, | |
| "logps/chosen": -1245.125, | |
| "logps/rejected": -1597.5, | |
| "loss": 0.43212890625, | |
| "nll_loss": 1.4794921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.4609375, | |
| "rewards/margins": 13.46875, | |
| "rewards/rejected": -8.0, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.7376311844077961, | |
| "grad_norm": 16.5, | |
| "learning_rate": 5.385877310633233e-06, | |
| "logits/chosen": -1.69921875, | |
| "logits/rejected": -1.68359375, | |
| "logps/chosen": -489.5, | |
| "logps/rejected": -786.5, | |
| "loss": 0.345703125, | |
| "nll_loss": 0.82373046875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4609375, | |
| "rewards/margins": 8.8125, | |
| "rewards/rejected": -5.359375, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.7406296851574213, | |
| "grad_norm": 35.0, | |
| "learning_rate": 5.353777088091177e-06, | |
| "logits/chosen": -1.607421875, | |
| "logits/rejected": -1.59765625, | |
| "logps/chosen": -1627.0, | |
| "logps/rejected": -1680.0, | |
| "loss": 0.611328125, | |
| "nll_loss": 2.056640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.4296875, | |
| "rewards/margins": 14.265625, | |
| "rewards/rejected": -7.84375, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.7436281859070465, | |
| "grad_norm": 23.0, | |
| "learning_rate": 5.321662203714909e-06, | |
| "logits/chosen": -1.68359375, | |
| "logits/rejected": -1.73046875, | |
| "logps/chosen": -1636.0, | |
| "logps/rejected": -1757.0, | |
| "loss": 0.54150390625, | |
| "nll_loss": 1.890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.828125, | |
| "rewards/margins": 12.703125, | |
| "rewards/rejected": -6.875, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.7466266866566716, | |
| "grad_norm": 25.625, | |
| "learning_rate": 5.289533988464307e-06, | |
| "logits/chosen": -1.625, | |
| "logits/rejected": -1.5517578125, | |
| "logps/chosen": -1512.875, | |
| "logps/rejected": -1639.625, | |
| "loss": 0.66015625, | |
| "nll_loss": 1.72509765625, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 4.1015625, | |
| "rewards/margins": 10.1484375, | |
| "rewards/rejected": -6.0546875, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.7496251874062968, | |
| "grad_norm": 6.125, | |
| "learning_rate": 5.257393773851733e-06, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -1.658203125, | |
| "logps/chosen": -1528.0, | |
| "logps/rejected": -1630.0, | |
| "loss": 0.494140625, | |
| "nll_loss": 1.8515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.734375, | |
| "rewards/margins": 13.921875, | |
| "rewards/rejected": -8.1875, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.7526236881559221, | |
| "grad_norm": 60.75, | |
| "learning_rate": 5.2252428918868446e-06, | |
| "logits/chosen": -1.69140625, | |
| "logits/rejected": -1.66796875, | |
| "logps/chosen": -1806.0, | |
| "logps/rejected": -1942.0, | |
| "loss": 0.7265625, | |
| "nll_loss": 2.3203125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3828125, | |
| "rewards/margins": 9.9609375, | |
| "rewards/rejected": -4.578125, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.7556221889055472, | |
| "grad_norm": 22.25, | |
| "learning_rate": 5.193082675021393e-06, | |
| "logits/chosen": -1.73046875, | |
| "logits/rejected": -1.712890625, | |
| "logps/chosen": -2152.0, | |
| "logps/rejected": -2320.0, | |
| "loss": 0.63037109375, | |
| "nll_loss": 2.302734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.28125, | |
| "rewards/margins": 11.984375, | |
| "rewards/rejected": -5.703125, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.7586206896551724, | |
| "grad_norm": 62.25, | |
| "learning_rate": 5.160914456094005e-06, | |
| "logits/chosen": -1.736328125, | |
| "logits/rejected": -1.724609375, | |
| "logps/chosen": -615.0, | |
| "logps/rejected": -682.625, | |
| "loss": 0.571533203125, | |
| "nll_loss": 1.18505859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.45703125, | |
| "rewards/margins": 7.4140625, | |
| "rewards/rejected": -3.96875, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.7616191904047976, | |
| "grad_norm": 46.25, | |
| "learning_rate": 5.1287395682749444e-06, | |
| "logits/chosen": -1.798828125, | |
| "logits/rejected": -1.7734375, | |
| "logps/chosen": -1304.0, | |
| "logps/rejected": -1418.0, | |
| "loss": 0.5908203125, | |
| "nll_loss": 1.853515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6015625, | |
| "rewards/margins": 10.40625, | |
| "rewards/rejected": -4.8046875, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.7646176911544228, | |
| "grad_norm": 24.375, | |
| "learning_rate": 5.0965593450108495e-06, | |
| "logits/chosen": -1.7265625, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1091.5, | |
| "logps/rejected": -1184.0, | |
| "loss": 0.5869140625, | |
| "nll_loss": 2.044921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.140625, | |
| "rewards/margins": 11.359375, | |
| "rewards/rejected": -6.2265625, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.767616191904048, | |
| "grad_norm": 54.0, | |
| "learning_rate": 5.064375119969491e-06, | |
| "logits/chosen": -1.697265625, | |
| "logits/rejected": -1.69140625, | |
| "logps/chosen": -1405.625, | |
| "logps/rejected": -1509.25, | |
| "loss": 0.5546875, | |
| "nll_loss": 1.7880859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.73046875, | |
| "rewards/margins": 16.265625, | |
| "rewards/rejected": -12.546875, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.7706146926536732, | |
| "grad_norm": 28.125, | |
| "learning_rate": 5.03218822698448e-06, | |
| "logits/chosen": -1.68359375, | |
| "logits/rejected": -1.697265625, | |
| "logps/chosen": -1122.25, | |
| "logps/rejected": -1205.75, | |
| "loss": 0.5009765625, | |
| "nll_loss": 1.689453125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.6328125, | |
| "rewards/margins": 11.84375, | |
| "rewards/rejected": -7.1953125, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.7736131934032984, | |
| "grad_norm": 8.125, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": -1.62890625, | |
| "logits/rejected": -1.599609375, | |
| "logps/chosen": -1328.0, | |
| "logps/rejected": -1572.0, | |
| "loss": 0.47412109375, | |
| "nll_loss": 1.8125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.3984375, | |
| "rewards/margins": 16.59375, | |
| "rewards/rejected": -10.1875, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.7766116941529235, | |
| "grad_norm": 8.75, | |
| "learning_rate": 4.967811773015521e-06, | |
| "logits/chosen": -1.716796875, | |
| "logits/rejected": -1.75390625, | |
| "logps/chosen": -1272.0, | |
| "logps/rejected": -1446.0, | |
| "loss": 0.5322265625, | |
| "nll_loss": 1.875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.734375, | |
| "rewards/margins": 11.875, | |
| "rewards/rejected": -7.140625, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.7796101949025487, | |
| "grad_norm": 6.3125, | |
| "learning_rate": 4.93562488003051e-06, | |
| "logits/chosen": -1.529296875, | |
| "logits/rejected": -1.607421875, | |
| "logps/chosen": -881.125, | |
| "logps/rejected": -986.875, | |
| "loss": 0.49365234375, | |
| "nll_loss": 1.46142578125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.47265625, | |
| "rewards/margins": 10.2265625, | |
| "rewards/rejected": -5.75390625, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.782608695652174, | |
| "grad_norm": 51.0, | |
| "learning_rate": 4.90344065498915e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.71875, | |
| "logps/chosen": -1242.0, | |
| "logps/rejected": -1396.5, | |
| "loss": 0.6455078125, | |
| "nll_loss": 1.60498046875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.3125, | |
| "rewards/margins": 9.578125, | |
| "rewards/rejected": -7.265625, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.7856071964017991, | |
| "grad_norm": 27.375, | |
| "learning_rate": 4.871260431725058e-06, | |
| "logits/chosen": -1.630859375, | |
| "logits/rejected": -1.72265625, | |
| "logps/chosen": -875.0, | |
| "logps/rejected": -1267.0, | |
| "loss": 0.5908203125, | |
| "nll_loss": 1.451171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.4140625, | |
| "rewards/margins": 10.203125, | |
| "rewards/rejected": -5.7890625, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.7886056971514243, | |
| "grad_norm": 36.5, | |
| "learning_rate": 4.8390855439059955e-06, | |
| "logits/chosen": -1.62890625, | |
| "logits/rejected": -1.705078125, | |
| "logps/chosen": -1325.0, | |
| "logps/rejected": -1452.0, | |
| "loss": 0.548828125, | |
| "nll_loss": 1.77734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.28125, | |
| "rewards/margins": 10.6875, | |
| "rewards/rejected": -6.40625, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7916041979010495, | |
| "grad_norm": 50.0, | |
| "learning_rate": 4.806917324978608e-06, | |
| "logits/chosen": -1.697265625, | |
| "logits/rejected": -1.72265625, | |
| "logps/chosen": -1273.5, | |
| "logps/rejected": -1497.0, | |
| "loss": 0.6025390625, | |
| "nll_loss": 1.62890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3828125, | |
| "rewards/margins": 9.4609375, | |
| "rewards/rejected": -4.078125, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7946026986506747, | |
| "grad_norm": 48.75, | |
| "learning_rate": 4.774757108113156e-06, | |
| "logits/chosen": -1.736328125, | |
| "logits/rejected": -1.7421875, | |
| "logps/chosen": -1547.5, | |
| "logps/rejected": -1678.5, | |
| "loss": 0.58349609375, | |
| "nll_loss": 1.935546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.75390625, | |
| "rewards/margins": 12.28125, | |
| "rewards/rejected": -7.53125, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7976011994002998, | |
| "grad_norm": 31.375, | |
| "learning_rate": 4.742606226148268e-06, | |
| "logits/chosen": -1.78125, | |
| "logits/rejected": -1.751953125, | |
| "logps/chosen": -777.0, | |
| "logps/rejected": -767.0, | |
| "loss": 0.6220703125, | |
| "nll_loss": 1.3740234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.546875, | |
| "rewards/margins": 8.515625, | |
| "rewards/rejected": -5.9609375, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.800599700149925, | |
| "grad_norm": 15.1875, | |
| "learning_rate": 4.710466011535695e-06, | |
| "logits/chosen": -1.62890625, | |
| "logits/rejected": -1.73828125, | |
| "logps/chosen": -1782.0, | |
| "logps/rejected": -1670.0, | |
| "loss": 0.56640625, | |
| "nll_loss": 2.0615234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.65625, | |
| "rewards/margins": 12.34375, | |
| "rewards/rejected": -7.6875, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.8035982008995503, | |
| "grad_norm": 18.0, | |
| "learning_rate": 4.678337796285093e-06, | |
| "logits/chosen": -1.728515625, | |
| "logits/rejected": -1.76953125, | |
| "logps/chosen": -1278.875, | |
| "logps/rejected": -1236.125, | |
| "loss": 0.59326171875, | |
| "nll_loss": 1.6357421875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1875, | |
| "rewards/margins": 9.390625, | |
| "rewards/rejected": -6.1875, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.8065967016491754, | |
| "grad_norm": 35.5, | |
| "learning_rate": 4.6462229119088234e-06, | |
| "logits/chosen": -1.85546875, | |
| "logits/rejected": -1.806640625, | |
| "logps/chosen": -1858.0, | |
| "logps/rejected": -1754.0, | |
| "loss": 0.703125, | |
| "nll_loss": 2.439453125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3984375, | |
| "rewards/margins": 12.96875, | |
| "rewards/rejected": -7.578125, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.8095952023988006, | |
| "grad_norm": 36.75, | |
| "learning_rate": 4.614122689366769e-06, | |
| "logits/chosen": -1.783203125, | |
| "logits/rejected": -1.78515625, | |
| "logps/chosen": -1732.0, | |
| "logps/rejected": -1936.0, | |
| "loss": 0.6005859375, | |
| "nll_loss": 2.103515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.5234375, | |
| "rewards/margins": 12.0625, | |
| "rewards/rejected": -5.53125, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.8125937031484258, | |
| "grad_norm": 48.0, | |
| "learning_rate": 4.582038459011165e-06, | |
| "logits/chosen": -1.65625, | |
| "logits/rejected": -1.6484375, | |
| "logps/chosen": -1510.0, | |
| "logps/rejected": -1506.0, | |
| "loss": 0.53466796875, | |
| "nll_loss": 1.7109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6015625, | |
| "rewards/margins": 14.71875, | |
| "rewards/rejected": -9.125, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.815592203898051, | |
| "grad_norm": 22.25, | |
| "learning_rate": 4.549971550531474e-06, | |
| "logits/chosen": -1.70703125, | |
| "logits/rejected": -1.73046875, | |
| "logps/chosen": -760.1875, | |
| "logps/rejected": -847.0625, | |
| "loss": 0.55322265625, | |
| "nll_loss": 1.33154296875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.66796875, | |
| "rewards/margins": 8.203125, | |
| "rewards/rejected": -4.53125, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.8185907046476761, | |
| "grad_norm": 37.0, | |
| "learning_rate": 4.51792329289928e-06, | |
| "logits/chosen": -1.712890625, | |
| "logits/rejected": -1.658203125, | |
| "logps/chosen": -1470.5, | |
| "logps/rejected": -1666.0, | |
| "loss": 0.5849609375, | |
| "nll_loss": 1.81640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.328125, | |
| "rewards/margins": 11.5, | |
| "rewards/rejected": -6.171875, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.8215892053973014, | |
| "grad_norm": 26.125, | |
| "learning_rate": 4.485895014313198e-06, | |
| "logits/chosen": -1.765625, | |
| "logits/rejected": -1.6953125, | |
| "logps/chosen": -1831.0, | |
| "logps/rejected": -1974.0, | |
| "loss": 0.572265625, | |
| "nll_loss": 2.09375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.578125, | |
| "rewards/margins": 14.0, | |
| "rewards/rejected": -8.421875, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.8245877061469266, | |
| "grad_norm": 26.25, | |
| "learning_rate": 4.453888042143847e-06, | |
| "logits/chosen": -1.6953125, | |
| "logits/rejected": -1.80859375, | |
| "logps/chosen": -1425.0, | |
| "logps/rejected": -1620.0, | |
| "loss": 0.7119140625, | |
| "nll_loss": 2.078125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.9140625, | |
| "rewards/margins": 9.0078125, | |
| "rewards/rejected": -4.0859375, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.8275862068965517, | |
| "grad_norm": 59.75, | |
| "learning_rate": 4.421903702878822e-06, | |
| "logits/chosen": -1.755859375, | |
| "logits/rejected": -1.806640625, | |
| "logps/chosen": -1453.0, | |
| "logps/rejected": -1662.0, | |
| "loss": 0.583984375, | |
| "nll_loss": 1.93359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.359375, | |
| "rewards/margins": 14.78125, | |
| "rewards/rejected": -6.421875, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.8305847076461769, | |
| "grad_norm": 15.1875, | |
| "learning_rate": 4.389943322067728e-06, | |
| "logits/chosen": -1.59375, | |
| "logits/rejected": -1.6015625, | |
| "logps/chosen": -503.75, | |
| "logps/rejected": -765.5, | |
| "loss": 0.2955322265625, | |
| "nll_loss": 0.731689453125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2421875, | |
| "rewards/margins": 12.578125, | |
| "rewards/rejected": -9.3359375, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.8335832083958021, | |
| "grad_norm": 35.75, | |
| "learning_rate": 4.358008224267245e-06, | |
| "logits/chosen": -1.595703125, | |
| "logits/rejected": -1.646484375, | |
| "logps/chosen": -1723.0, | |
| "logps/rejected": -1879.0, | |
| "loss": 0.5888671875, | |
| "nll_loss": 2.0234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.890625, | |
| "rewards/margins": 12.3125, | |
| "rewards/rejected": -6.421875, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.8365817091454273, | |
| "grad_norm": 9.5625, | |
| "learning_rate": 4.326099732986231e-06, | |
| "logits/chosen": -1.73046875, | |
| "logits/rejected": -1.72265625, | |
| "logps/chosen": -851.734375, | |
| "logps/rejected": -817.25, | |
| "loss": 0.5048828125, | |
| "nll_loss": 1.164306640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.314453125, | |
| "rewards/margins": 8.859375, | |
| "rewards/rejected": -4.5546875, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.8395802098950524, | |
| "grad_norm": 18.625, | |
| "learning_rate": 4.29421917063087e-06, | |
| "logits/chosen": -1.830078125, | |
| "logits/rejected": -1.822265625, | |
| "logps/chosen": -1700.0, | |
| "logps/rejected": -1900.0, | |
| "loss": 0.705078125, | |
| "nll_loss": 2.12890625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.640625, | |
| "rewards/margins": 11.4140625, | |
| "rewards/rejected": -6.7578125, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.8425787106446777, | |
| "grad_norm": 61.25, | |
| "learning_rate": 4.262367858449867e-06, | |
| "logits/chosen": -1.62890625, | |
| "logits/rejected": -1.65625, | |
| "logps/chosen": -1511.0, | |
| "logps/rejected": -1962.0, | |
| "loss": 0.7255859375, | |
| "nll_loss": 2.3125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.0703125, | |
| "rewards/margins": 11.625, | |
| "rewards/rejected": -5.5625, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.8455772113943029, | |
| "grad_norm": 24.5, | |
| "learning_rate": 4.230547116479691e-06, | |
| "logits/chosen": -1.748046875, | |
| "logits/rejected": -1.76953125, | |
| "logps/chosen": -1011.6875, | |
| "logps/rejected": -1073.25, | |
| "loss": 0.432861328125, | |
| "nll_loss": 1.27880859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3984375, | |
| "rewards/margins": 10.96875, | |
| "rewards/rejected": -5.5859375, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.848575712143928, | |
| "grad_norm": 42.25, | |
| "learning_rate": 4.1987582634898724e-06, | |
| "logits/chosen": -1.681640625, | |
| "logits/rejected": -1.62890625, | |
| "logps/chosen": -1221.75, | |
| "logps/rejected": -1297.0, | |
| "loss": 0.474365234375, | |
| "nll_loss": 1.327392578125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.66015625, | |
| "rewards/margins": 13.53125, | |
| "rewards/rejected": -8.859375, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.8515742128935532, | |
| "grad_norm": 55.5, | |
| "learning_rate": 4.167002616928341e-06, | |
| "logits/chosen": -1.671875, | |
| "logits/rejected": -1.662109375, | |
| "logps/chosen": -831.75, | |
| "logps/rejected": -880.0, | |
| "loss": 0.5908203125, | |
| "nll_loss": 1.43115234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.638671875, | |
| "rewards/margins": 8.984375, | |
| "rewards/rejected": -6.359375, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.8545727136431784, | |
| "grad_norm": 7.25, | |
| "learning_rate": 4.135281492866826e-06, | |
| "logits/chosen": -1.73046875, | |
| "logits/rejected": -1.755859375, | |
| "logps/chosen": -1302.0, | |
| "logps/rejected": -1766.0, | |
| "loss": 0.55322265625, | |
| "nll_loss": 2.083984375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.90625, | |
| "rewards/margins": 14.296875, | |
| "rewards/rejected": -8.40625, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.8575712143928036, | |
| "grad_norm": 48.25, | |
| "learning_rate": 4.103596205946323e-06, | |
| "logits/chosen": -1.623046875, | |
| "logits/rejected": -1.681640625, | |
| "logps/chosen": -1045.6875, | |
| "logps/rejected": -1236.75, | |
| "loss": 0.625, | |
| "nll_loss": 1.55859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.359375, | |
| "rewards/margins": 9.03125, | |
| "rewards/rejected": -5.671875, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.8605697151424287, | |
| "grad_norm": 32.0, | |
| "learning_rate": 4.0719480693225964e-06, | |
| "logits/chosen": -1.6328125, | |
| "logits/rejected": -1.71484375, | |
| "logps/chosen": -1069.75, | |
| "logps/rejected": -976.25, | |
| "loss": 0.6630859375, | |
| "nll_loss": 1.48388671875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.046875, | |
| "rewards/margins": 8.68359375, | |
| "rewards/rejected": -3.63671875, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.863568215892054, | |
| "grad_norm": 13.8125, | |
| "learning_rate": 4.040338394611772e-06, | |
| "logits/chosen": -1.748046875, | |
| "logits/rejected": -1.71875, | |
| "logps/chosen": -1143.25, | |
| "logps/rejected": -1241.0, | |
| "loss": 0.45947265625, | |
| "nll_loss": 1.4365234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.59375, | |
| "rewards/margins": 11.484375, | |
| "rewards/rejected": -7.90625, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.8665667166416792, | |
| "grad_norm": 17.375, | |
| "learning_rate": 4.0087684918359646e-06, | |
| "logits/chosen": -1.64453125, | |
| "logits/rejected": -1.73046875, | |
| "logps/chosen": -1401.0, | |
| "logps/rejected": -1714.0, | |
| "loss": 0.50048828125, | |
| "nll_loss": 1.7919921875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.515625, | |
| "rewards/margins": 14.984375, | |
| "rewards/rejected": -8.46875, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 3.977239669368998e-06, | |
| "logits/chosen": -1.810546875, | |
| "logits/rejected": -1.708984375, | |
| "logps/chosen": -1070.5, | |
| "logps/rejected": -1133.0, | |
| "loss": 0.50244140625, | |
| "nll_loss": 1.74560546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.09375, | |
| "rewards/margins": 15.515625, | |
| "rewards/rejected": -10.390625, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.8725637181409296, | |
| "grad_norm": 40.5, | |
| "learning_rate": 3.945753233882168e-06, | |
| "logits/chosen": -1.66015625, | |
| "logits/rejected": -1.689453125, | |
| "logps/chosen": -962.25, | |
| "logps/rejected": -1140.25, | |
| "loss": 0.4775390625, | |
| "nll_loss": 1.3759765625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.951171875, | |
| "rewards/margins": 10.0390625, | |
| "rewards/rejected": -6.0859375, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.8755622188905547, | |
| "grad_norm": 40.25, | |
| "learning_rate": 3.9143104902901085e-06, | |
| "logits/chosen": -1.69921875, | |
| "logits/rejected": -1.69140625, | |
| "logps/chosen": -1700.0, | |
| "logps/rejected": -1820.0, | |
| "loss": 0.7255859375, | |
| "nll_loss": 2.208984375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6328125, | |
| "rewards/margins": 11.265625, | |
| "rewards/rejected": -5.640625, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.8785607196401799, | |
| "grad_norm": 27.25, | |
| "learning_rate": 3.882912741696688e-06, | |
| "logits/chosen": -1.703125, | |
| "logits/rejected": -1.6953125, | |
| "logps/chosen": -1748.0, | |
| "logps/rejected": -1916.0, | |
| "loss": 0.673828125, | |
| "nll_loss": 2.037109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.1875, | |
| "rewards/margins": 12.515625, | |
| "rewards/rejected": -7.328125, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.881559220389805, | |
| "grad_norm": 11.1875, | |
| "learning_rate": 3.851561289341023e-06, | |
| "logits/chosen": -1.73828125, | |
| "logits/rejected": -1.74609375, | |
| "logps/chosen": -1832.0, | |
| "logps/rejected": -1968.0, | |
| "loss": 0.7021484375, | |
| "nll_loss": 2.150390625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.390625, | |
| "rewards/margins": 13.078125, | |
| "rewards/rejected": -6.6875, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.8845577211394303, | |
| "grad_norm": 52.5, | |
| "learning_rate": 3.820257432543539e-06, | |
| "logits/chosen": -1.744140625, | |
| "logits/rejected": -1.71484375, | |
| "logps/chosen": -999.5, | |
| "logps/rejected": -1240.25, | |
| "loss": 0.666015625, | |
| "nll_loss": 1.64990234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6015625, | |
| "rewards/margins": 11.1015625, | |
| "rewards/rejected": -5.484375, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.8875562218890555, | |
| "grad_norm": 15.125, | |
| "learning_rate": 3.789002468652121e-06, | |
| "logits/chosen": -1.662109375, | |
| "logits/rejected": -1.669921875, | |
| "logps/chosen": -1266.5, | |
| "logps/rejected": -1460.0, | |
| "loss": 0.5751953125, | |
| "nll_loss": 1.904296875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.0859375, | |
| "rewards/margins": 11.984375, | |
| "rewards/rejected": -6.8984375, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.8905547226386806, | |
| "grad_norm": 14.375, | |
| "learning_rate": 3.7577976929883608e-06, | |
| "logits/chosen": -1.640625, | |
| "logits/rejected": -1.5625, | |
| "logps/chosen": -1092.25, | |
| "logps/rejected": -1052.5, | |
| "loss": 0.61572265625, | |
| "nll_loss": 1.72509765625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.234375, | |
| "rewards/margins": 11.65625, | |
| "rewards/rejected": -7.421875, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.8935532233883059, | |
| "grad_norm": 45.75, | |
| "learning_rate": 3.726644398793857e-06, | |
| "logits/chosen": -1.822265625, | |
| "logits/rejected": -1.822265625, | |
| "logps/chosen": -1571.0, | |
| "logps/rejected": -1712.0, | |
| "loss": 0.7763671875, | |
| "nll_loss": 1.75, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.98828125, | |
| "rewards/margins": 8.859375, | |
| "rewards/rejected": -5.8828125, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.896551724137931, | |
| "grad_norm": 8.375, | |
| "learning_rate": 3.695543877176626e-06, | |
| "logits/chosen": -1.75, | |
| "logits/rejected": -1.734375, | |
| "logps/chosen": -1709.0, | |
| "logps/rejected": -1790.0, | |
| "loss": 0.5283203125, | |
| "nll_loss": 1.96484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.9921875, | |
| "rewards/margins": 12.765625, | |
| "rewards/rejected": -6.7734375, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.8995502248875562, | |
| "grad_norm": 11.8125, | |
| "learning_rate": 3.6644974170575907e-06, | |
| "logits/chosen": -1.69921875, | |
| "logits/rejected": -1.71484375, | |
| "logps/chosen": -919.75, | |
| "logps/rejected": -874.5, | |
| "loss": 0.53564453125, | |
| "nll_loss": 1.59619140625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.76953125, | |
| "rewards/margins": 9.625, | |
| "rewards/rejected": -5.8671875, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.9025487256371814, | |
| "grad_norm": 16.75, | |
| "learning_rate": 3.6335063051171725e-06, | |
| "logits/chosen": -1.83203125, | |
| "logits/rejected": -1.814453125, | |
| "logps/chosen": -1432.0, | |
| "logps/rejected": -1301.0, | |
| "loss": 0.63037109375, | |
| "nll_loss": 1.95703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.0625, | |
| "rewards/margins": 10.671875, | |
| "rewards/rejected": -6.609375, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.9055472263868066, | |
| "grad_norm": 25.5, | |
| "learning_rate": 3.6025718257419532e-06, | |
| "logits/chosen": -1.662109375, | |
| "logits/rejected": -1.68359375, | |
| "logps/chosen": -844.375, | |
| "logps/rejected": -1091.25, | |
| "loss": 0.654296875, | |
| "nll_loss": 1.51171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.984375, | |
| "rewards/margins": 8.92578125, | |
| "rewards/rejected": -4.94140625, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.9085457271364318, | |
| "grad_norm": 35.5, | |
| "learning_rate": 3.5716952609714517e-06, | |
| "logits/chosen": -1.755859375, | |
| "logits/rejected": -1.716796875, | |
| "logps/chosen": -1732.0, | |
| "logps/rejected": -1798.0, | |
| "loss": 0.5380859375, | |
| "nll_loss": 1.880859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.8125, | |
| "rewards/margins": 13.328125, | |
| "rewards/rejected": -7.515625, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.9115442278860569, | |
| "grad_norm": 15.0, | |
| "learning_rate": 3.540877890444989e-06, | |
| "logits/chosen": -1.646484375, | |
| "logits/rejected": -1.671875, | |
| "logps/chosen": -941.125, | |
| "logps/rejected": -1159.0, | |
| "loss": 0.361328125, | |
| "nll_loss": 1.1339111328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.390625, | |
| "rewards/margins": 16.875, | |
| "rewards/rejected": -12.46875, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.9145427286356822, | |
| "grad_norm": 25.0, | |
| "learning_rate": 3.5101209913486655e-06, | |
| "logits/chosen": -1.75390625, | |
| "logits/rejected": -1.73046875, | |
| "logps/chosen": -1143.359375, | |
| "logps/rejected": -1245.375, | |
| "loss": 0.48388671875, | |
| "nll_loss": 1.440185546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.66796875, | |
| "rewards/margins": 8.578125, | |
| "rewards/rejected": -4.9140625, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.9175412293853074, | |
| "grad_norm": 28.875, | |
| "learning_rate": 3.4794258383624115e-06, | |
| "logits/chosen": -1.734375, | |
| "logits/rejected": -1.767578125, | |
| "logps/chosen": -1412.75, | |
| "logps/rejected": -1507.0, | |
| "loss": 0.5478515625, | |
| "nll_loss": 1.734375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.6796875, | |
| "rewards/margins": 9.5, | |
| "rewards/rejected": -4.8203125, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.9205397301349325, | |
| "grad_norm": 18.25, | |
| "learning_rate": 3.448793703607175e-06, | |
| "logits/chosen": -1.669921875, | |
| "logits/rejected": -1.6953125, | |
| "logps/chosen": -1309.0, | |
| "logps/rejected": -1461.0, | |
| "loss": 0.537109375, | |
| "nll_loss": 1.5556640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.375, | |
| "rewards/margins": 11.2578125, | |
| "rewards/rejected": -6.8828125, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.9235382308845578, | |
| "grad_norm": 25.75, | |
| "learning_rate": 3.4182258565921933e-06, | |
| "logits/chosen": -1.78515625, | |
| "logits/rejected": -1.76171875, | |
| "logps/chosen": -1364.0, | |
| "logps/rejected": -1426.0, | |
| "loss": 0.55078125, | |
| "nll_loss": 1.841796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.20703125, | |
| "rewards/margins": 10.15625, | |
| "rewards/rejected": -5.9609375, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.9265367316341829, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 3.3877235641623797e-06, | |
| "logits/chosen": -1.642578125, | |
| "logits/rejected": -1.66015625, | |
| "logps/chosen": -496.75, | |
| "logps/rejected": -698.5, | |
| "loss": 0.4853515625, | |
| "nll_loss": 0.99365234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.99609375, | |
| "rewards/margins": 8.07421875, | |
| "rewards/rejected": -5.078125, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.9295352323838081, | |
| "grad_norm": 7.28125, | |
| "learning_rate": 3.3572880904458267e-06, | |
| "logits/chosen": -1.607421875, | |
| "logits/rejected": -1.685546875, | |
| "logps/chosen": -1492.25, | |
| "logps/rejected": -1327.75, | |
| "loss": 0.57421875, | |
| "nll_loss": 2.006103515625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.54296875, | |
| "rewards/margins": 13.5859375, | |
| "rewards/rejected": -9.03125, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.9325337331334332, | |
| "grad_norm": 31.75, | |
| "learning_rate": 3.32692069680141e-06, | |
| "logits/chosen": -1.744140625, | |
| "logits/rejected": -1.826171875, | |
| "logps/chosen": -1816.5, | |
| "logps/rejected": -1791.0, | |
| "loss": 0.63330078125, | |
| "nll_loss": 2.265625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.15625, | |
| "rewards/margins": 13.453125, | |
| "rewards/rejected": -7.296875, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.9355322338830585, | |
| "grad_norm": 61.25, | |
| "learning_rate": 3.2966226417665125e-06, | |
| "logits/chosen": -1.744140625, | |
| "logits/rejected": -1.759765625, | |
| "logps/chosen": -1076.0, | |
| "logps/rejected": -1744.0, | |
| "loss": 0.568359375, | |
| "nll_loss": 1.85546875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.1875, | |
| "rewards/margins": 13.625, | |
| "rewards/rejected": -7.4296875, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.9385307346326837, | |
| "grad_norm": 11.875, | |
| "learning_rate": 3.2663951810048683e-06, | |
| "logits/chosen": -1.703125, | |
| "logits/rejected": -1.75, | |
| "logps/chosen": -1554.0, | |
| "logps/rejected": -1578.0, | |
| "loss": 0.5556640625, | |
| "nll_loss": 2.0390625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.4375, | |
| "rewards/margins": 12.328125, | |
| "rewards/rejected": -6.890625, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.9415292353823088, | |
| "grad_norm": 6.75, | |
| "learning_rate": 3.236239567254526e-06, | |
| "logits/chosen": -1.6796875, | |
| "logits/rejected": -1.6875, | |
| "logps/chosen": -1006.5, | |
| "logps/rejected": -1101.5, | |
| "loss": 0.395263671875, | |
| "nll_loss": 1.31591796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.1171875, | |
| "rewards/margins": 14.734375, | |
| "rewards/rejected": -9.609375, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.9445277361319341, | |
| "grad_norm": 9.25, | |
| "learning_rate": 3.206157050275927e-06, | |
| "logits/chosen": -1.791015625, | |
| "logits/rejected": -1.71875, | |
| "logps/chosen": -1070.0, | |
| "logps/rejected": -1108.0, | |
| "loss": 0.4912109375, | |
| "nll_loss": 1.67578125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.4296875, | |
| "rewards/margins": 11.15625, | |
| "rewards/rejected": -6.71875, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.9475262368815592, | |
| "grad_norm": 15.1875, | |
| "learning_rate": 3.176148876800109e-06, | |
| "logits/chosen": -1.8203125, | |
| "logits/rejected": -1.76171875, | |
| "logps/chosen": -1850.0, | |
| "logps/rejected": -1926.0, | |
| "loss": 0.560546875, | |
| "nll_loss": 2.033203125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.4765625, | |
| "rewards/margins": 12.4375, | |
| "rewards/rejected": -6.953125, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.9505247376311844, | |
| "grad_norm": 27.25, | |
| "learning_rate": 3.1462162904770376e-06, | |
| "logits/chosen": -1.650390625, | |
| "logits/rejected": -1.662109375, | |
| "logps/chosen": -1740.5, | |
| "logps/rejected": -1696.25, | |
| "loss": 0.5810546875, | |
| "nll_loss": 1.8818359375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.421875, | |
| "rewards/margins": 10.3828125, | |
| "rewards/rejected": -5.9609375, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.9535232383808095, | |
| "grad_norm": 6.84375, | |
| "learning_rate": 3.116360531824074e-06, | |
| "logits/chosen": -1.505859375, | |
| "logits/rejected": -1.517578125, | |
| "logps/chosen": -302.40625, | |
| "logps/rejected": -462.75, | |
| "loss": 0.2587890625, | |
| "nll_loss": 0.62255859375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3359375, | |
| "rewards/margins": 13.15625, | |
| "rewards/rejected": -9.8359375, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.9565217391304348, | |
| "grad_norm": 7.46875, | |
| "learning_rate": 3.0865828381745515e-06, | |
| "logits/chosen": -1.712890625, | |
| "logits/rejected": -1.7265625, | |
| "logps/chosen": -1206.0, | |
| "logps/rejected": -1292.0, | |
| "loss": 0.5078125, | |
| "nll_loss": 1.751953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6171875, | |
| "rewards/margins": 12.65625, | |
| "rewards/rejected": -7.03125, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.95952023988006, | |
| "grad_norm": 23.625, | |
| "learning_rate": 3.056884443626499e-06, | |
| "logits/chosen": -1.765625, | |
| "logits/rejected": -1.76953125, | |
| "logps/chosen": -1606.0, | |
| "logps/rejected": -1787.0, | |
| "loss": 0.515625, | |
| "nll_loss": 1.6953125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.546875, | |
| "rewards/margins": 10.8515625, | |
| "rewards/rejected": -6.3046875, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9625187406296851, | |
| "grad_norm": 15.4375, | |
| "learning_rate": 3.027266578991497e-06, | |
| "logits/chosen": -1.71875, | |
| "logits/rejected": -1.732421875, | |
| "logps/chosen": -1544.5, | |
| "logps/rejected": -1706.5, | |
| "loss": 0.4716796875, | |
| "nll_loss": 1.61083984375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.8671875, | |
| "rewards/margins": 11.8125, | |
| "rewards/rejected": -6.9453125, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.9655172413793104, | |
| "grad_norm": 18.625, | |
| "learning_rate": 2.997730471743667e-06, | |
| "logits/chosen": -1.6328125, | |
| "logits/rejected": -1.650390625, | |
| "logps/chosen": -1250.75, | |
| "logps/rejected": -1418.0, | |
| "loss": 0.44970703125, | |
| "nll_loss": 1.46240234375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.78515625, | |
| "rewards/margins": 13.453125, | |
| "rewards/rejected": -8.6796875, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.9685157421289355, | |
| "grad_norm": 17.625, | |
| "learning_rate": 2.9682773459688087e-06, | |
| "logits/chosen": -1.638671875, | |
| "logits/rejected": -1.642578125, | |
| "logps/chosen": -1154.0, | |
| "logps/rejected": -1335.0, | |
| "loss": 0.51171875, | |
| "nll_loss": 1.658203125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.484375, | |
| "rewards/margins": 11.859375, | |
| "rewards/rejected": -7.390625, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.9715142428785607, | |
| "grad_norm": 10.6875, | |
| "learning_rate": 2.9389084223136523e-06, | |
| "logits/chosen": -1.6875, | |
| "logits/rejected": -1.705078125, | |
| "logps/chosen": -1032.5, | |
| "logps/rejected": -1095.0, | |
| "loss": 0.4462890625, | |
| "nll_loss": 1.50390625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.765625, | |
| "rewards/margins": 14.046875, | |
| "rewards/rejected": -9.28125, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.974512743628186, | |
| "grad_norm": 12.3125, | |
| "learning_rate": 2.9096249179352833e-06, | |
| "logits/chosen": -1.49609375, | |
| "logits/rejected": -1.5703125, | |
| "logps/chosen": -1409.5, | |
| "logps/rejected": -1573.0, | |
| "loss": 0.5673828125, | |
| "nll_loss": 2.056640625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.6328125, | |
| "rewards/margins": 14.546875, | |
| "rewards/rejected": -8.9296875, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.9775112443778111, | |
| "grad_norm": 10.8125, | |
| "learning_rate": 2.880428046450697e-06, | |
| "logits/chosen": -1.591796875, | |
| "logits/rejected": -1.658203125, | |
| "logps/chosen": -1150.0, | |
| "logps/rejected": -1345.0, | |
| "loss": 0.45703125, | |
| "nll_loss": 1.662109375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.7734375, | |
| "rewards/margins": 13.71875, | |
| "rewards/rejected": -7.9375, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.9805097451274363, | |
| "grad_norm": 12.4375, | |
| "learning_rate": 2.8513190178865004e-06, | |
| "logits/chosen": -1.646484375, | |
| "logits/rejected": -1.609375, | |
| "logps/chosen": -724.75, | |
| "logps/rejected": -562.5, | |
| "loss": 0.37158203125, | |
| "nll_loss": 1.12841796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.72265625, | |
| "rewards/margins": 10.484375, | |
| "rewards/rejected": -6.7734375, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.9835082458770614, | |
| "grad_norm": 21.125, | |
| "learning_rate": 2.822299038628762e-06, | |
| "logits/chosen": -1.615234375, | |
| "logits/rejected": -1.689453125, | |
| "logps/chosen": -1058.0, | |
| "logps/rejected": -1222.25, | |
| "loss": 0.490966796875, | |
| "nll_loss": 1.5361328125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.78515625, | |
| "rewards/margins": 10.96875, | |
| "rewards/rejected": -6.1796875, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.9865067466266867, | |
| "grad_norm": 37.25, | |
| "learning_rate": 2.793369311373021e-06, | |
| "logits/chosen": -1.708984375, | |
| "logits/rejected": -1.705078125, | |
| "logps/chosen": -1668.0, | |
| "logps/rejected": -1784.0, | |
| "loss": 0.59375, | |
| "nll_loss": 1.796875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3125, | |
| "rewards/margins": 12.0, | |
| "rewards/rejected": -6.6875, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.9895052473763118, | |
| "grad_norm": 58.75, | |
| "learning_rate": 2.7645310350744296e-06, | |
| "logits/chosen": -1.818359375, | |
| "logits/rejected": -1.806640625, | |
| "logps/chosen": -1686.0, | |
| "logps/rejected": -1610.0, | |
| "loss": 0.609375, | |
| "nll_loss": 1.896484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.359375, | |
| "rewards/margins": 10.609375, | |
| "rewards/rejected": -7.25, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.992503748125937, | |
| "grad_norm": 7.59375, | |
| "learning_rate": 2.7357854048980893e-06, | |
| "logits/chosen": -1.7265625, | |
| "logits/rejected": -1.73046875, | |
| "logps/chosen": -1509.0, | |
| "logps/rejected": -1515.0, | |
| "loss": 0.568359375, | |
| "nll_loss": 2.03125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.671875, | |
| "rewards/margins": 12.734375, | |
| "rewards/rejected": -7.0625, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.9955022488755623, | |
| "grad_norm": 10.125, | |
| "learning_rate": 2.7071336121694856e-06, | |
| "logits/chosen": -1.638671875, | |
| "logits/rejected": -1.654296875, | |
| "logps/chosen": -1256.5, | |
| "logps/rejected": -1523.0, | |
| "loss": 0.47998046875, | |
| "nll_loss": 1.6484375, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.1484375, | |
| "rewards/margins": 12.953125, | |
| "rewards/rejected": -6.8125, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.9985007496251874, | |
| "grad_norm": 23.25, | |
| "learning_rate": 2.6785768443251437e-06, | |
| "logits/chosen": -1.70703125, | |
| "logits/rejected": -1.705078125, | |
| "logps/chosen": -1104.875, | |
| "logps/rejected": -1212.5, | |
| "loss": 0.5185546875, | |
| "nll_loss": 1.36279296875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 4.06640625, | |
| "rewards/margins": 11.3046875, | |
| "rewards/rejected": -7.234375, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 44.5, | |
| "learning_rate": 2.6501162848634023e-06, | |
| "logits/chosen": -1.80859375, | |
| "logits/rejected": -1.859375, | |
| "logps/chosen": -2200.0, | |
| "logps/rejected": -2188.0, | |
| "loss": 0.654296875, | |
| "nll_loss": 2.28125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.25, | |
| "rewards/margins": 12.75, | |
| "rewards/rejected": -7.5, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.0029985007496252, | |
| "grad_norm": 4.40625, | |
| "learning_rate": 2.621753113295361e-06, | |
| "logits/chosen": -1.673828125, | |
| "logits/rejected": -1.642578125, | |
| "logps/chosen": -1009.0, | |
| "logps/rejected": -1076.0, | |
| "loss": 0.47216796875, | |
| "nll_loss": 1.6875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.3359375, | |
| "rewards/margins": 12.15625, | |
| "rewards/rejected": -6.8203125, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.0059970014992503, | |
| "grad_norm": 3.359375, | |
| "learning_rate": 2.5934885050960183e-06, | |
| "logits/chosen": -1.732421875, | |
| "logits/rejected": -1.705078125, | |
| "logps/chosen": -1638.0, | |
| "logps/rejected": -1790.0, | |
| "loss": 0.6123046875, | |
| "nll_loss": 2.26171875, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 6.890625, | |
| "rewards/margins": 14.515625, | |
| "rewards/rejected": -7.640625, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.0089955022488755, | |
| "grad_norm": 3.09375, | |
| "learning_rate": 2.565323631655532e-06, | |
| "logits/chosen": -1.77734375, | |
| "logits/rejected": -1.732421875, | |
| "logps/chosen": -1436.0, | |
| "logps/rejected": -1691.0, | |
| "loss": 0.513671875, | |
| "nll_loss": 1.8828125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.375, | |
| "rewards/margins": 15.96875, | |
| "rewards/rejected": -7.578125, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.0119940029985008, | |
| "grad_norm": 5.0625, | |
| "learning_rate": 2.537259660230679e-06, | |
| "logits/chosen": -1.634765625, | |
| "logits/rejected": -1.64453125, | |
| "logps/chosen": -803.0, | |
| "logps/rejected": -966.0, | |
| "loss": 0.39501953125, | |
| "nll_loss": 1.34765625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 5.171875, | |
| "rewards/margins": 13.359375, | |
| "rewards/rejected": -8.1875, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.014992503748126, | |
| "grad_norm": 8.1875, | |
| "learning_rate": 2.5092977538964887e-06, | |
| "logits/chosen": -1.744140625, | |
| "logits/rejected": -1.72265625, | |
| "logps/chosen": -1679.0, | |
| "logps/rejected": -1773.0, | |
| "loss": 0.5478515625, | |
| "nll_loss": 2.095703125, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 7.6953125, | |
| "rewards/margins": 18.03125, | |
| "rewards/rejected": -10.34375, | |
| "step": 339 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 501, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |