Instructions to use bboeun/dpo2-Delayed2-ref-sft1-fix with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- PEFT
How to use bboeun/dpo2-Delayed2-ref-sft1-fix with PEFT:
from peft import PeftModel from transformers import AutoModelForCausalLM base_model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2") model = PeftModel.from_pretrained(base_model, "bboeun/dpo2-Delayed2-ref-sft1-fix") - Notebooks
- Google Colab
- Kaggle
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.8888888888888888, | |
| "eval_steps": 500, | |
| "global_step": 2000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5.309734513274336e-07, | |
| "logits/chosen": -2.1858465671539307, | |
| "logits/rejected": -2.2539868354797363, | |
| "logps/chosen": -292.47344970703125, | |
| "logps/rejected": -334.2834777832031, | |
| "loss": 2.328, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -17.95108985900879, | |
| "rewards/margins": 1.5200703144073486, | |
| "rewards/rejected": -19.47115707397461, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.415929203539823e-06, | |
| "logits/chosen": -2.250004529953003, | |
| "logits/rejected": -2.2245919704437256, | |
| "logps/chosen": -323.00567626953125, | |
| "logps/rejected": -341.8704528808594, | |
| "loss": 3.0458, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -18.9575138092041, | |
| "rewards/margins": 0.811493992805481, | |
| "rewards/rejected": -19.76900863647461, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.3008849557522127e-06, | |
| "logits/chosen": -2.2509493827819824, | |
| "logits/rejected": -2.2362070083618164, | |
| "logps/chosen": -309.36627197265625, | |
| "logps/rejected": -354.1287841796875, | |
| "loss": 2.001, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -19.02206039428711, | |
| "rewards/margins": 2.324467182159424, | |
| "rewards/rejected": -21.346529006958008, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.185840707964602e-06, | |
| "logits/chosen": -2.261589527130127, | |
| "logits/rejected": -2.234139919281006, | |
| "logps/chosen": -341.8447265625, | |
| "logps/rejected": -361.2301330566406, | |
| "loss": 2.3698, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -19.69499397277832, | |
| "rewards/margins": 1.1049805879592896, | |
| "rewards/rejected": -20.799976348876953, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.070796460176992e-06, | |
| "logits/chosen": -2.282593250274658, | |
| "logits/rejected": -2.219956874847412, | |
| "logps/chosen": -333.1883850097656, | |
| "logps/rejected": -323.2119140625, | |
| "loss": 2.3553, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -19.006275177001953, | |
| "rewards/margins": 0.894936203956604, | |
| "rewards/rejected": -19.90121078491211, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.955752212389381e-06, | |
| "logits/chosen": -2.2947192192077637, | |
| "logits/rejected": -2.191793918609619, | |
| "logps/chosen": -327.3343200683594, | |
| "logps/rejected": -302.55914306640625, | |
| "loss": 3.0721, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -18.95311164855957, | |
| "rewards/margins": 0.02760167047381401, | |
| "rewards/rejected": -18.980712890625, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.840707964601771e-06, | |
| "logits/chosen": -2.1300625801086426, | |
| "logits/rejected": -2.197695255279541, | |
| "logps/chosen": -296.19586181640625, | |
| "logps/rejected": -322.7232360839844, | |
| "loss": 2.5242, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -18.607830047607422, | |
| "rewards/margins": 0.5492460131645203, | |
| "rewards/rejected": -19.15707778930664, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 6.72566371681416e-06, | |
| "logits/chosen": -2.191436290740967, | |
| "logits/rejected": -2.203051805496216, | |
| "logps/chosen": -322.64581298828125, | |
| "logps/rejected": -318.93475341796875, | |
| "loss": 2.4603, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -18.32453155517578, | |
| "rewards/margins": 1.1701295375823975, | |
| "rewards/rejected": -19.494661331176758, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.610619469026549e-06, | |
| "logits/chosen": -2.3291049003601074, | |
| "logits/rejected": -2.13211727142334, | |
| "logps/chosen": -351.888671875, | |
| "logps/rejected": -316.0814514160156, | |
| "loss": 4.3049, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -20.648174285888672, | |
| "rewards/margins": -1.917999505996704, | |
| "rewards/rejected": -18.730175018310547, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.495575221238938e-06, | |
| "logits/chosen": -2.326770782470703, | |
| "logits/rejected": -2.2708096504211426, | |
| "logps/chosen": -319.8079528808594, | |
| "logps/rejected": -325.22467041015625, | |
| "loss": 2.9022, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -17.211898803710938, | |
| "rewards/margins": 0.4395485818386078, | |
| "rewards/rejected": -17.651447296142578, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.380530973451329e-06, | |
| "logits/chosen": -2.2947869300842285, | |
| "logits/rejected": -2.2642266750335693, | |
| "logps/chosen": -319.7033386230469, | |
| "logps/rejected": -301.95684814453125, | |
| "loss": 2.9535, | |
| "rewards/accuracies": 0.32499998807907104, | |
| "rewards/chosen": -18.591039657592773, | |
| "rewards/margins": -1.4626668691635132, | |
| "rewards/rejected": -17.128376007080078, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.999951373555555e-06, | |
| "logits/chosen": -2.356776475906372, | |
| "logits/rejected": -2.2779877185821533, | |
| "logps/chosen": -332.5343322753906, | |
| "logps/rejected": -308.6272888183594, | |
| "loss": 2.8838, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -18.02423095703125, | |
| "rewards/margins": -0.5776697993278503, | |
| "rewards/rejected": -17.446561813354492, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.999086929743288e-06, | |
| "logits/chosen": -2.34501314163208, | |
| "logits/rejected": -2.3048901557922363, | |
| "logps/chosen": -298.5960388183594, | |
| "logps/rejected": -309.3174743652344, | |
| "loss": 2.0696, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -16.59781265258789, | |
| "rewards/margins": 0.7586337327957153, | |
| "rewards/rejected": -17.356447219848633, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.997142113313472e-06, | |
| "logits/chosen": -2.3136909008026123, | |
| "logits/rejected": -2.3042447566986084, | |
| "logps/chosen": -292.8536071777344, | |
| "logps/rejected": -281.0971984863281, | |
| "loss": 1.8399, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -15.984518051147461, | |
| "rewards/margins": 0.30002641677856445, | |
| "rewards/rejected": -16.284543991088867, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.994117344568142e-06, | |
| "logits/chosen": -2.337782144546509, | |
| "logits/rejected": -2.3470942974090576, | |
| "logps/chosen": -286.35504150390625, | |
| "logps/rejected": -303.07684326171875, | |
| "loss": 1.5656, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -14.58277416229248, | |
| "rewards/margins": 1.030444860458374, | |
| "rewards/rejected": -15.61322021484375, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 9.990013277202137e-06, | |
| "logits/chosen": -2.3595287799835205, | |
| "logits/rejected": -2.4950690269470215, | |
| "logps/chosen": -292.61651611328125, | |
| "logps/rejected": -363.38507080078125, | |
| "loss": 1.523, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -15.285211563110352, | |
| "rewards/margins": 2.0152671337127686, | |
| "rewards/rejected": -17.300477981567383, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.984830798161828e-06, | |
| "logits/chosen": -2.4216346740722656, | |
| "logits/rejected": -2.35921311378479, | |
| "logps/chosen": -329.1554870605469, | |
| "logps/rejected": -308.78326416015625, | |
| "loss": 2.5844, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -14.768890380859375, | |
| "rewards/margins": -0.32357311248779297, | |
| "rewards/rejected": -14.445318222045898, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.978571027453433e-06, | |
| "logits/chosen": -2.5200698375701904, | |
| "logits/rejected": -2.338383674621582, | |
| "logps/chosen": -296.1730041503906, | |
| "logps/rejected": -232.0618896484375, | |
| "loss": 2.4226, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -13.72007942199707, | |
| "rewards/margins": -0.8905000686645508, | |
| "rewards/rejected": -12.829577445983887, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 9.971235317900968e-06, | |
| "logits/chosen": -2.4042282104492188, | |
| "logits/rejected": -2.4900546073913574, | |
| "logps/chosen": -219.2891845703125, | |
| "logps/rejected": -247.385498046875, | |
| "loss": 1.5221, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -11.559672355651855, | |
| "rewards/margins": 0.2930552363395691, | |
| "rewards/rejected": -11.852727890014648, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.962825254853888e-06, | |
| "logits/chosen": -2.591836929321289, | |
| "logits/rejected": -2.5101170539855957, | |
| "logps/chosen": -311.3710632324219, | |
| "logps/rejected": -277.0614318847656, | |
| "loss": 2.1722, | |
| "rewards/accuracies": 0.32499998807907104, | |
| "rewards/chosen": -13.245725631713867, | |
| "rewards/margins": -1.0529097318649292, | |
| "rewards/rejected": -12.192815780639648, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 9.954339123272747e-06, | |
| "logits/chosen": -2.5649514198303223, | |
| "logits/rejected": -2.4265828132629395, | |
| "logps/chosen": -250.44009399414062, | |
| "logps/rejected": -228.14224243164062, | |
| "loss": 1.4704, | |
| "rewards/accuracies": 0.32499998807907104, | |
| "rewards/chosen": -10.943647384643555, | |
| "rewards/margins": -0.3656729757785797, | |
| "rewards/rejected": -10.577974319458008, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.943892987470688e-06, | |
| "logits/chosen": -2.559394598007202, | |
| "logits/rejected": -2.523345470428467, | |
| "logps/chosen": -260.9962463378906, | |
| "logps/rejected": -234.96670532226562, | |
| "loss": 1.709, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -10.393632888793945, | |
| "rewards/margins": 0.1549229919910431, | |
| "rewards/rejected": -10.548555374145508, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.932378407234108e-06, | |
| "logits/chosen": -2.63352632522583, | |
| "logits/rejected": -2.5623555183410645, | |
| "logps/chosen": -271.7388916015625, | |
| "logps/rejected": -272.16796875, | |
| "loss": 1.2704, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -9.603178024291992, | |
| "rewards/margins": 0.19006821513175964, | |
| "rewards/rejected": -9.793245315551758, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.919797871024877e-06, | |
| "logits/chosen": -2.6439247131347656, | |
| "logits/rejected": -2.6053879261016846, | |
| "logps/chosen": -229.23764038085938, | |
| "logps/rejected": -197.8614044189453, | |
| "loss": 1.405, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -9.295554161071777, | |
| "rewards/margins": -0.5798273086547852, | |
| "rewards/rejected": -8.715726852416992, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.906154097672858e-06, | |
| "logits/chosen": -2.6798043251037598, | |
| "logits/rejected": -2.600550889968872, | |
| "logps/chosen": -235.1671142578125, | |
| "logps/rejected": -223.978271484375, | |
| "loss": 1.2942, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -8.195772171020508, | |
| "rewards/margins": -0.00971608143299818, | |
| "rewards/rejected": -8.186057090759277, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.89145003578833e-06, | |
| "logits/chosen": -2.670474052429199, | |
| "logits/rejected": -2.6329426765441895, | |
| "logps/chosen": -224.05068969726562, | |
| "logps/rejected": -207.1922607421875, | |
| "loss": 1.0877, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -7.675335884094238, | |
| "rewards/margins": 0.17977333068847656, | |
| "rewards/rejected": -7.855108737945557, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.875688863124766e-06, | |
| "logits/chosen": -2.620087146759033, | |
| "logits/rejected": -2.676790714263916, | |
| "logps/chosen": -255.08486938476562, | |
| "logps/rejected": -265.8028564453125, | |
| "loss": 1.0495, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -7.672966957092285, | |
| "rewards/margins": 0.1472960114479065, | |
| "rewards/rejected": -7.8202619552612305, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 9.858873985892058e-06, | |
| "logits/chosen": -2.6771128177642822, | |
| "logits/rejected": -2.5845065116882324, | |
| "logps/chosen": -222.91311645507812, | |
| "logps/rejected": -234.68359375, | |
| "loss": 1.0752, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -6.951257228851318, | |
| "rewards/margins": -0.008678942918777466, | |
| "rewards/rejected": -6.9425787925720215, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.841009038020401e-06, | |
| "logits/chosen": -2.6333932876586914, | |
| "logits/rejected": -2.65295147895813, | |
| "logps/chosen": -204.25399780273438, | |
| "logps/rejected": -208.4911651611328, | |
| "loss": 1.0669, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -6.73724365234375, | |
| "rewards/margins": 0.05855642631649971, | |
| "rewards/rejected": -6.795799255371094, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 9.82209788037494e-06, | |
| "logits/chosen": -2.685725450515747, | |
| "logits/rejected": -2.700352907180786, | |
| "logps/chosen": -230.539794921875, | |
| "logps/rejected": -240.39224243164062, | |
| "loss": 1.1248, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -6.4564642906188965, | |
| "rewards/margins": -0.2118469774723053, | |
| "rewards/rejected": -6.244616985321045, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.80214459992139e-06, | |
| "logits/chosen": -2.714470624923706, | |
| "logits/rejected": -2.6982994079589844, | |
| "logps/chosen": -214.0612030029297, | |
| "logps/rejected": -231.0535125732422, | |
| "loss": 0.8095, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -6.09361457824707, | |
| "rewards/margins": 0.35767459869384766, | |
| "rewards/rejected": -6.45128870010376, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.781153508842785e-06, | |
| "logits/chosen": -2.6795332431793213, | |
| "logits/rejected": -2.6861202716827393, | |
| "logps/chosen": -191.6574249267578, | |
| "logps/rejected": -206.572998046875, | |
| "loss": 0.9054, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -6.0128302574157715, | |
| "rewards/margins": 0.5337953567504883, | |
| "rewards/rejected": -6.54662561416626, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.759129143607547e-06, | |
| "logits/chosen": -2.719517469406128, | |
| "logits/rejected": -2.630643367767334, | |
| "logps/chosen": -228.45797729492188, | |
| "logps/rejected": -176.00814819335938, | |
| "loss": 1.1571, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -6.262964725494385, | |
| "rewards/margins": -0.4287610650062561, | |
| "rewards/rejected": -5.834203243255615, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 9.736076263989103e-06, | |
| "logits/chosen": -2.753007650375366, | |
| "logits/rejected": -2.7196168899536133, | |
| "logps/chosen": -234.21731567382812, | |
| "logps/rejected": -214.3049774169922, | |
| "loss": 0.93, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -5.743313789367676, | |
| "rewards/margins": 0.08766243606805801, | |
| "rewards/rejected": -5.830975532531738, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.711999852037226e-06, | |
| "logits/chosen": -2.702094554901123, | |
| "logits/rejected": -2.6643381118774414, | |
| "logps/chosen": -235.38766479492188, | |
| "logps/rejected": -208.2432861328125, | |
| "loss": 1.1837, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -5.757768154144287, | |
| "rewards/margins": -0.27699437737464905, | |
| "rewards/rejected": -5.480773448944092, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.68690511100134e-06, | |
| "logits/chosen": -2.6954503059387207, | |
| "logits/rejected": -2.6649551391601562, | |
| "logps/chosen": -185.06394958496094, | |
| "logps/rejected": -187.76278686523438, | |
| "loss": 1.0071, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -5.9798102378845215, | |
| "rewards/margins": -0.11689682304859161, | |
| "rewards/rejected": -5.862914085388184, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 9.660797464206035e-06, | |
| "logits/chosen": -2.6881985664367676, | |
| "logits/rejected": -2.676832914352417, | |
| "logps/chosen": -195.05517578125, | |
| "logps/rejected": -212.87161254882812, | |
| "loss": 0.6422, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -4.617544174194336, | |
| "rewards/margins": 0.8601192235946655, | |
| "rewards/rejected": -5.477663516998291, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.633682553879e-06, | |
| "logits/chosen": -2.749539852142334, | |
| "logits/rejected": -2.7113490104675293, | |
| "logps/chosen": -173.92945861816406, | |
| "logps/rejected": -176.216796875, | |
| "loss": 0.8915, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -5.114466667175293, | |
| "rewards/margins": 0.10453431308269501, | |
| "rewards/rejected": -5.219000816345215, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 9.605566239931666e-06, | |
| "logits/chosen": -2.744715690612793, | |
| "logits/rejected": -2.6837120056152344, | |
| "logps/chosen": -200.80999755859375, | |
| "logps/rejected": -200.7525177001953, | |
| "loss": 0.633, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -4.719931602478027, | |
| "rewards/margins": 0.698486864566803, | |
| "rewards/rejected": -5.418419361114502, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.576454598692797e-06, | |
| "logits/chosen": -2.7422823905944824, | |
| "logits/rejected": -2.7130322456359863, | |
| "logps/chosen": -204.26626586914062, | |
| "logps/rejected": -174.83802795410156, | |
| "loss": 0.9281, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -4.756241798400879, | |
| "rewards/margins": -0.038588762283325195, | |
| "rewards/rejected": -4.717652320861816, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 9.546353921595306e-06, | |
| "logits/chosen": -2.7594494819641113, | |
| "logits/rejected": -2.7436954975128174, | |
| "logps/chosen": -183.6326141357422, | |
| "logps/rejected": -186.80911254882812, | |
| "loss": 0.9906, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -4.743472099304199, | |
| "rewards/margins": -0.156986802816391, | |
| "rewards/rejected": -4.586484909057617, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.515270713816589e-06, | |
| "logits/chosen": -2.762357711791992, | |
| "logits/rejected": -2.661778688430786, | |
| "logps/chosen": -212.29739379882812, | |
| "logps/rejected": -185.29476928710938, | |
| "loss": 0.9206, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -5.090394973754883, | |
| "rewards/margins": 0.17267219722270966, | |
| "rewards/rejected": -5.263067722320557, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 9.483211692872669e-06, | |
| "logits/chosen": -2.694725513458252, | |
| "logits/rejected": -2.689701557159424, | |
| "logps/chosen": -168.6083221435547, | |
| "logps/rejected": -170.26681518554688, | |
| "loss": 0.9479, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -4.593288421630859, | |
| "rewards/margins": -0.16782906651496887, | |
| "rewards/rejected": -4.425459384918213, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.450183787166447e-06, | |
| "logits/chosen": -2.6913774013519287, | |
| "logits/rejected": -2.780381202697754, | |
| "logps/chosen": -141.98934936523438, | |
| "logps/rejected": -177.6278076171875, | |
| "loss": 0.9904, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -4.094004154205322, | |
| "rewards/margins": -0.12103526294231415, | |
| "rewards/rejected": -3.972968578338623, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.41619413449037e-06, | |
| "logits/chosen": -2.804361581802368, | |
| "logits/rejected": -2.7710132598876953, | |
| "logps/chosen": -209.9197540283203, | |
| "logps/rejected": -231.4965057373047, | |
| "loss": 0.654, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -3.7489547729492188, | |
| "rewards/margins": 0.5616118311882019, | |
| "rewards/rejected": -4.3105669021606445, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 9.381250080483864e-06, | |
| "logits/chosen": -2.777339458465576, | |
| "logits/rejected": -2.7908101081848145, | |
| "logps/chosen": -197.44711303710938, | |
| "logps/rejected": -195.8129425048828, | |
| "loss": 0.8654, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -4.063180923461914, | |
| "rewards/margins": 0.1730591356754303, | |
| "rewards/rejected": -4.236240386962891, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.345359177045827e-06, | |
| "logits/chosen": -2.7428901195526123, | |
| "logits/rejected": -2.720733642578125, | |
| "logps/chosen": -163.38687133789062, | |
| "logps/rejected": -152.174072265625, | |
| "loss": 1.008, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -4.339611053466797, | |
| "rewards/margins": -0.002607667353004217, | |
| "rewards/rejected": -4.337003707885742, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 9.308529180702568e-06, | |
| "logits/chosen": -2.771120309829712, | |
| "logits/rejected": -2.754432201385498, | |
| "logps/chosen": -190.38487243652344, | |
| "logps/rejected": -209.5969696044922, | |
| "loss": 0.9381, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -4.276428699493408, | |
| "rewards/margins": 0.031873930245637894, | |
| "rewards/rejected": -4.308302879333496, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.270768050931515e-06, | |
| "logits/chosen": -2.729900360107422, | |
| "logits/rejected": -2.793795108795166, | |
| "logps/chosen": -181.68646240234375, | |
| "logps/rejected": -203.8788299560547, | |
| "loss": 0.9827, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -4.365941524505615, | |
| "rewards/margins": -0.1641651839017868, | |
| "rewards/rejected": -4.201776504516602, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.232083948441046e-06, | |
| "logits/chosen": -2.7761735916137695, | |
| "logits/rejected": -2.7046539783477783, | |
| "logps/chosen": -190.8777618408203, | |
| "logps/rejected": -169.68423461914062, | |
| "loss": 0.7403, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -4.126107215881348, | |
| "rewards/margins": 0.2456977367401123, | |
| "rewards/rejected": -4.371805191040039, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.192485233406862e-06, | |
| "logits/chosen": -2.788799524307251, | |
| "logits/rejected": -2.8254306316375732, | |
| "logps/chosen": -204.0353240966797, | |
| "logps/rejected": -216.4750518798828, | |
| "loss": 0.6348, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -3.6200859546661377, | |
| "rewards/margins": 0.6306756138801575, | |
| "rewards/rejected": -4.250761985778809, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 9.151980463665227e-06, | |
| "logits/chosen": -2.7755134105682373, | |
| "logits/rejected": -2.7311973571777344, | |
| "logps/chosen": -215.2248077392578, | |
| "logps/rejected": -178.1062469482422, | |
| "loss": 0.9884, | |
| "rewards/accuracies": 0.2750000059604645, | |
| "rewards/chosen": -3.970677137374878, | |
| "rewards/margins": -0.2024170607328415, | |
| "rewards/rejected": -3.7682597637176514, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.1105783928635e-06, | |
| "logits/chosen": -2.7572436332702637, | |
| "logits/rejected": -2.720371961593628, | |
| "logps/chosen": -203.22486877441406, | |
| "logps/rejected": -213.2503662109375, | |
| "loss": 0.8808, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -4.16678524017334, | |
| "rewards/margins": 0.12414976209402084, | |
| "rewards/rejected": -4.2909345626831055, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.068287968568355e-06, | |
| "logits/chosen": -2.7487785816192627, | |
| "logits/rejected": -2.724555253982544, | |
| "logps/chosen": -175.81295776367188, | |
| "logps/rejected": -203.91702270507812, | |
| "loss": 0.7847, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -3.4349968433380127, | |
| "rewards/margins": 0.22362789511680603, | |
| "rewards/rejected": -3.6586246490478516, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 9.02511833033208e-06, | |
| "logits/chosen": -2.6728549003601074, | |
| "logits/rejected": -2.7027556896209717, | |
| "logps/chosen": -174.01890563964844, | |
| "logps/rejected": -172.8794403076172, | |
| "loss": 0.8984, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -4.354010105133057, | |
| "rewards/margins": -0.13735604286193848, | |
| "rewards/rejected": -4.216653823852539, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 8.981078807717396e-06, | |
| "logits/chosen": -2.780517578125, | |
| "logits/rejected": -2.6801159381866455, | |
| "logps/chosen": -230.1298370361328, | |
| "logps/rejected": -203.07168579101562, | |
| "loss": 0.625, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -3.319823741912842, | |
| "rewards/margins": 0.6677559614181519, | |
| "rewards/rejected": -3.987579822540283, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 8.936178918281209e-06, | |
| "logits/chosen": -2.799701690673828, | |
| "logits/rejected": -2.815525770187378, | |
| "logps/chosen": -205.35971069335938, | |
| "logps/rejected": -223.64096069335938, | |
| "loss": 0.8421, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -3.762897491455078, | |
| "rewards/margins": 0.13321921229362488, | |
| "rewards/rejected": -3.8961167335510254, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 8.890428365517728e-06, | |
| "logits/chosen": -2.8051438331604004, | |
| "logits/rejected": -2.7885632514953613, | |
| "logps/chosen": -197.3937530517578, | |
| "logps/rejected": -187.13601684570312, | |
| "loss": 0.7381, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -3.4687581062316895, | |
| "rewards/margins": 0.25684064626693726, | |
| "rewards/rejected": -3.7255985736846924, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 8.843837036761404e-06, | |
| "logits/chosen": -2.7467944622039795, | |
| "logits/rejected": -2.7005391120910645, | |
| "logps/chosen": -152.84494018554688, | |
| "logps/rejected": -160.13241577148438, | |
| "loss": 0.7662, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -3.732775926589966, | |
| "rewards/margins": 0.06671512126922607, | |
| "rewards/rejected": -3.7994911670684814, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 8.796415001050154e-06, | |
| "logits/chosen": -2.7716736793518066, | |
| "logits/rejected": -2.7501323223114014, | |
| "logps/chosen": -221.6551513671875, | |
| "logps/rejected": -201.65664672851562, | |
| "loss": 0.8448, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -3.700239896774292, | |
| "rewards/margins": 0.12370122969150543, | |
| "rewards/rejected": -3.823941469192505, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 8.748172506949274e-06, | |
| "logits/chosen": -2.7913918495178223, | |
| "logits/rejected": -2.7350287437438965, | |
| "logps/chosen": -168.91790771484375, | |
| "logps/rejected": -155.0623321533203, | |
| "loss": 0.5851, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -3.5637214183807373, | |
| "rewards/margins": 0.42019423842430115, | |
| "rewards/rejected": -3.9839158058166504, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 8.699119980336602e-06, | |
| "logits/chosen": -2.7849667072296143, | |
| "logits/rejected": -2.771721601486206, | |
| "logps/chosen": -192.60813903808594, | |
| "logps/rejected": -206.09646606445312, | |
| "loss": 0.9976, | |
| "rewards/accuracies": 0.32499998807907104, | |
| "rewards/chosen": -3.9602694511413574, | |
| "rewards/margins": -0.18327102065086365, | |
| "rewards/rejected": -3.776998519897461, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 8.649268022149333e-06, | |
| "logits/chosen": -2.7933568954467773, | |
| "logits/rejected": -2.7272696495056152, | |
| "logps/chosen": -179.9084930419922, | |
| "logps/rejected": -169.74490356445312, | |
| "loss": 0.68, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -3.221353054046631, | |
| "rewards/margins": 0.3395439684391022, | |
| "rewards/rejected": -3.5608971118927, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 8.59862740609301e-06, | |
| "logits/chosen": -2.7812328338623047, | |
| "logits/rejected": -2.8435587882995605, | |
| "logps/chosen": -209.0635223388672, | |
| "logps/rejected": -244.5385284423828, | |
| "loss": 0.6579, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -3.0288193225860596, | |
| "rewards/margins": 0.4765963554382324, | |
| "rewards/rejected": -3.505415678024292, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.547209076313172e-06, | |
| "logits/chosen": -2.8104701042175293, | |
| "logits/rejected": -2.7969369888305664, | |
| "logps/chosen": -206.7493896484375, | |
| "logps/rejected": -246.7706298828125, | |
| "loss": 0.7068, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -3.3913025856018066, | |
| "rewards/margins": 0.37605711817741394, | |
| "rewards/rejected": -3.767359495162964, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 8.495024145030174e-06, | |
| "logits/chosen": -2.743499279022217, | |
| "logits/rejected": -2.7557623386383057, | |
| "logps/chosen": -173.17481994628906, | |
| "logps/rejected": -186.05215454101562, | |
| "loss": 0.6764, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -3.472040891647339, | |
| "rewards/margins": 0.3358023464679718, | |
| "rewards/rejected": -3.807842969894409, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.442083890137678e-06, | |
| "logits/chosen": -2.8170254230499268, | |
| "logits/rejected": -2.760282516479492, | |
| "logps/chosen": -173.0248565673828, | |
| "logps/rejected": -184.1920623779297, | |
| "loss": 0.8291, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -3.5644659996032715, | |
| "rewards/margins": 0.05758289247751236, | |
| "rewards/rejected": -3.622048854827881, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 8.388399752765344e-06, | |
| "logits/chosen": -2.773528814315796, | |
| "logits/rejected": -2.758387327194214, | |
| "logps/chosen": -204.7705078125, | |
| "logps/rejected": -200.41160583496094, | |
| "loss": 0.8527, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -3.599118709564209, | |
| "rewards/margins": 0.017948562279343605, | |
| "rewards/rejected": -3.617067337036133, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.333983334806248e-06, | |
| "logits/chosen": -2.8039369583129883, | |
| "logits/rejected": -2.7655069828033447, | |
| "logps/chosen": -192.72186279296875, | |
| "logps/rejected": -173.8263702392578, | |
| "loss": 0.8641, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -3.713822603225708, | |
| "rewards/margins": -0.06442561000585556, | |
| "rewards/rejected": -3.6493968963623047, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 8.278846396409534e-06, | |
| "logits/chosen": -2.797102451324463, | |
| "logits/rejected": -2.7584991455078125, | |
| "logps/chosen": -195.19786071777344, | |
| "logps/rejected": -184.8418731689453, | |
| "loss": 0.7849, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -3.548352003097534, | |
| "rewards/margins": 0.16000667214393616, | |
| "rewards/rejected": -3.7083587646484375, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.223000853438904e-06, | |
| "logits/chosen": -2.8177175521850586, | |
| "logits/rejected": -2.7559008598327637, | |
| "logps/chosen": -218.43588256835938, | |
| "logps/rejected": -219.35946655273438, | |
| "loss": 0.7455, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -3.5060417652130127, | |
| "rewards/margins": 0.2477506697177887, | |
| "rewards/rejected": -3.7537918090820312, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.166458774897413e-06, | |
| "logits/chosen": -2.7866969108581543, | |
| "logits/rejected": -2.7426235675811768, | |
| "logps/chosen": -196.8046417236328, | |
| "logps/rejected": -180.7646484375, | |
| "loss": 0.6577, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -3.515568494796753, | |
| "rewards/margins": 0.46584025025367737, | |
| "rewards/rejected": -3.9814085960388184, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 8.109232380319194e-06, | |
| "logits/chosen": -2.781240940093994, | |
| "logits/rejected": -2.7888545989990234, | |
| "logps/chosen": -232.93215942382812, | |
| "logps/rejected": -232.01644897460938, | |
| "loss": 0.7337, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -3.5336124897003174, | |
| "rewards/margins": 0.2553574740886688, | |
| "rewards/rejected": -3.7889697551727295, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 8.051334037128661e-06, | |
| "logits/chosen": -2.7906103134155273, | |
| "logits/rejected": -2.742318630218506, | |
| "logps/chosen": -170.14791870117188, | |
| "logps/rejected": -173.310791015625, | |
| "loss": 0.828, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -3.540756940841675, | |
| "rewards/margins": 0.037487827241420746, | |
| "rewards/rejected": -3.578244686126709, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 7.99277625796771e-06, | |
| "logits/chosen": -2.7460989952087402, | |
| "logits/rejected": -2.710388660430908, | |
| "logps/chosen": -164.4999542236328, | |
| "logps/rejected": -171.73757934570312, | |
| "loss": 0.8343, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -3.3666107654571533, | |
| "rewards/margins": 0.040531255304813385, | |
| "rewards/rejected": -3.407141923904419, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 7.933571697991582e-06, | |
| "logits/chosen": -2.830110549926758, | |
| "logits/rejected": -2.7687745094299316, | |
| "logps/chosen": -210.4406280517578, | |
| "logps/rejected": -182.27137756347656, | |
| "loss": 0.8217, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -3.588493824005127, | |
| "rewards/margins": -0.05328698828816414, | |
| "rewards/rejected": -3.5352070331573486, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 7.873733152133898e-06, | |
| "logits/chosen": -2.751688241958618, | |
| "logits/rejected": -2.7940192222595215, | |
| "logps/chosen": -153.90414428710938, | |
| "logps/rejected": -158.2861328125, | |
| "loss": 0.8625, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -3.453404664993286, | |
| "rewards/margins": -0.1084330826997757, | |
| "rewards/rejected": -3.3449714183807373, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 7.813273552341496e-06, | |
| "logits/chosen": -2.7797484397888184, | |
| "logits/rejected": -2.775768995285034, | |
| "logps/chosen": -169.4456787109375, | |
| "logps/rejected": -177.5587921142578, | |
| "loss": 0.756, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -3.326707363128662, | |
| "rewards/margins": 0.25994253158569336, | |
| "rewards/rejected": -3.5866501331329346, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 7.75220596477966e-06, | |
| "logits/chosen": -2.7829766273498535, | |
| "logits/rejected": -2.7465267181396484, | |
| "logps/chosen": -164.01870727539062, | |
| "logps/rejected": -156.57614135742188, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -3.3400473594665527, | |
| "rewards/margins": 0.3525925874710083, | |
| "rewards/rejected": -3.6926398277282715, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 7.690543587008332e-06, | |
| "logits/chosen": -2.7533538341522217, | |
| "logits/rejected": -2.762204647064209, | |
| "logps/chosen": -221.1579132080078, | |
| "logps/rejected": -204.04983520507812, | |
| "loss": 0.8969, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -3.536458969116211, | |
| "rewards/margins": 0.05794559791684151, | |
| "rewards/rejected": -3.5944042205810547, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 7.628299745129943e-06, | |
| "logits/chosen": -2.7850310802459717, | |
| "logits/rejected": -2.756134510040283, | |
| "logps/chosen": -224.99118041992188, | |
| "logps/rejected": -199.38502502441406, | |
| "loss": 0.8558, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -3.7199528217315674, | |
| "rewards/margins": -0.056097112596035004, | |
| "rewards/rejected": -3.6638553142547607, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 7.565487890909448e-06, | |
| "logits/chosen": -2.8218209743499756, | |
| "logits/rejected": -2.775695323944092, | |
| "logps/chosen": -169.43869018554688, | |
| "logps/rejected": -147.3358612060547, | |
| "loss": 0.7543, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -3.0327506065368652, | |
| "rewards/margins": 0.10092975944280624, | |
| "rewards/rejected": -3.133680820465088, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.502121598867218e-06, | |
| "logits/chosen": -2.794593572616577, | |
| "logits/rejected": -2.8074254989624023, | |
| "logps/chosen": -191.18869018554688, | |
| "logps/rejected": -161.5567169189453, | |
| "loss": 0.702, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -3.0870423316955566, | |
| "rewards/margins": 0.3286024034023285, | |
| "rewards/rejected": -3.415644407272339, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 7.438214563345389e-06, | |
| "logits/chosen": -2.8384017944335938, | |
| "logits/rejected": -2.8303287029266357, | |
| "logps/chosen": -200.47872924804688, | |
| "logps/rejected": -202.9823760986328, | |
| "loss": 0.9219, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.9461796283721924, | |
| "rewards/margins": -0.013927942141890526, | |
| "rewards/rejected": -2.932251453399658, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.373780595548334e-06, | |
| "logits/chosen": -2.8200442790985107, | |
| "logits/rejected": -2.7595479488372803, | |
| "logps/chosen": -203.58987426757812, | |
| "logps/rejected": -193.07473754882812, | |
| "loss": 0.5825, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.7171919345855713, | |
| "rewards/margins": 0.6527736783027649, | |
| "rewards/rejected": -3.3699657917022705, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 7.3088336205578565e-06, | |
| "logits/chosen": -2.7865753173828125, | |
| "logits/rejected": -2.7725372314453125, | |
| "logps/chosen": -181.54159545898438, | |
| "logps/rejected": -192.08921813964844, | |
| "loss": 0.723, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.9962518215179443, | |
| "rewards/margins": 0.2384202927350998, | |
| "rewards/rejected": -3.2346718311309814, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.243387674323794e-06, | |
| "logits/chosen": -2.7999701499938965, | |
| "logits/rejected": -2.7826244831085205, | |
| "logps/chosen": -170.237548828125, | |
| "logps/rejected": -182.22238159179688, | |
| "loss": 0.7287, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.636094331741333, | |
| "rewards/margins": 0.3836243152618408, | |
| "rewards/rejected": -3.019718647003174, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 7.177456900630645e-06, | |
| "logits/chosen": -2.8270153999328613, | |
| "logits/rejected": -2.801821231842041, | |
| "logps/chosen": -169.65478515625, | |
| "logps/rejected": -149.74624633789062, | |
| "loss": 0.9289, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.8085739612579346, | |
| "rewards/margins": -0.13477511703968048, | |
| "rewards/rejected": -2.6737987995147705, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.111055548040911e-06, | |
| "logits/chosen": -2.843956708908081, | |
| "logits/rejected": -2.807281017303467, | |
| "logps/chosen": -204.63934326171875, | |
| "logps/rejected": -196.67213439941406, | |
| "loss": 0.7793, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.520915985107422, | |
| "rewards/margins": 0.05325014516711235, | |
| "rewards/rejected": -2.5741655826568604, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 7.044197966815773e-06, | |
| "logits/chosen": -2.8285329341888428, | |
| "logits/rejected": -2.735088348388672, | |
| "logps/chosen": -153.91452026367188, | |
| "logps/rejected": -138.55552673339844, | |
| "loss": 0.6409, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.84395694732666, | |
| "rewards/margins": 0.25751471519470215, | |
| "rewards/rejected": -3.1014719009399414, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 6.976898605813798e-06, | |
| "logits/chosen": -2.822996139526367, | |
| "logits/rejected": -2.8268377780914307, | |
| "logps/chosen": -167.09097290039062, | |
| "logps/rejected": -203.2536163330078, | |
| "loss": 0.8486, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.88107967376709, | |
| "rewards/margins": 0.07577097415924072, | |
| "rewards/rejected": -2.95685076713562, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 6.90917200936835e-06, | |
| "logits/chosen": -2.7948951721191406, | |
| "logits/rejected": -2.783585548400879, | |
| "logps/chosen": -145.66119384765625, | |
| "logps/rejected": -160.69918823242188, | |
| "loss": 0.8522, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -2.815007448196411, | |
| "rewards/margins": 0.01567123830318451, | |
| "rewards/rejected": -2.8306784629821777, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 6.841032814144345e-06, | |
| "logits/chosen": -2.7837324142456055, | |
| "logits/rejected": -2.7920632362365723, | |
| "logps/chosen": -150.3719940185547, | |
| "logps/rejected": -168.3992919921875, | |
| "loss": 0.661, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.577317476272583, | |
| "rewards/margins": 0.19722715020179749, | |
| "rewards/rejected": -2.7745444774627686, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 6.772495745975067e-06, | |
| "logits/chosen": -2.822993278503418, | |
| "logits/rejected": -2.793628454208374, | |
| "logps/chosen": -179.533447265625, | |
| "logps/rejected": -170.6478729248047, | |
| "loss": 0.6447, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.440274477005005, | |
| "rewards/margins": 0.4804176390171051, | |
| "rewards/rejected": -2.920691967010498, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 6.703575616679709e-06, | |
| "logits/chosen": -2.8847832679748535, | |
| "logits/rejected": -2.862794876098633, | |
| "logps/chosen": -203.72158813476562, | |
| "logps/rejected": -196.6941375732422, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.6080403327941895, | |
| "rewards/margins": 0.2483837604522705, | |
| "rewards/rejected": -2.856423854827881, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 6.634287320862334e-06, | |
| "logits/chosen": -2.8792309761047363, | |
| "logits/rejected": -2.7815871238708496, | |
| "logps/chosen": -189.05697631835938, | |
| "logps/rejected": -170.0454559326172, | |
| "loss": 0.7327, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.4347012042999268, | |
| "rewards/margins": 0.20491544902324677, | |
| "rewards/rejected": -2.6396164894104004, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 6.564645832692938e-06, | |
| "logits/chosen": -2.8398923873901367, | |
| "logits/rejected": -2.821370840072632, | |
| "logps/chosen": -162.66635131835938, | |
| "logps/rejected": -177.49655151367188, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.672360420227051, | |
| "rewards/margins": 0.2537608742713928, | |
| "rewards/rejected": -2.926121473312378, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.494666202671329e-06, | |
| "logits/chosen": -2.828071355819702, | |
| "logits/rejected": -2.7870450019836426, | |
| "logps/chosen": -175.61985778808594, | |
| "logps/rejected": -147.78115844726562, | |
| "loss": 0.9773, | |
| "rewards/accuracies": 0.3499999940395355, | |
| "rewards/chosen": -2.7835516929626465, | |
| "rewards/margins": -0.18404017388820648, | |
| "rewards/rejected": -2.59951114654541, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.424363554374496e-06, | |
| "logits/chosen": -2.8303914070129395, | |
| "logits/rejected": -2.8009707927703857, | |
| "logps/chosen": -184.24453735351562, | |
| "logps/rejected": -177.34475708007812, | |
| "loss": 0.8386, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.654391050338745, | |
| "rewards/margins": 0.08457916229963303, | |
| "rewards/rejected": -2.7389702796936035, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 6.353753081188194e-06, | |
| "logits/chosen": -2.8116297721862793, | |
| "logits/rejected": -2.8462095260620117, | |
| "logps/chosen": -154.33535766601562, | |
| "logps/rejected": -173.16500854492188, | |
| "loss": 0.8474, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -2.643991470336914, | |
| "rewards/margins": 0.021061301231384277, | |
| "rewards/rejected": -2.665052890777588, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6.28285004302345e-06, | |
| "logits/chosen": -2.813953399658203, | |
| "logits/rejected": -2.7975823879241943, | |
| "logps/chosen": -157.3970489501953, | |
| "logps/rejected": -169.88925170898438, | |
| "loss": 0.7732, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.4602997303009033, | |
| "rewards/margins": 0.03091360628604889, | |
| "rewards/rejected": -2.491213321685791, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 6.2116697630186685e-06, | |
| "logits/chosen": -2.8499863147735596, | |
| "logits/rejected": -2.761946201324463, | |
| "logps/chosen": -179.04576110839844, | |
| "logps/rejected": -169.37741088867188, | |
| "loss": 0.7098, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.956383228302002, | |
| "rewards/margins": 0.20148436725139618, | |
| "rewards/rejected": -3.157867431640625, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.140227624228098e-06, | |
| "logits/chosen": -2.829965353012085, | |
| "logits/rejected": -2.7929511070251465, | |
| "logps/chosen": -188.39486694335938, | |
| "logps/rejected": -196.4234161376953, | |
| "loss": 0.8087, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -3.0763561725616455, | |
| "rewards/margins": 0.08712232112884521, | |
| "rewards/rejected": -3.163478374481201, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 6.068539066297331e-06, | |
| "logits/chosen": -2.820751428604126, | |
| "logits/rejected": -2.7950470447540283, | |
| "logps/chosen": -193.37313842773438, | |
| "logps/rejected": -182.583251953125, | |
| "loss": 0.7543, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.8425216674804688, | |
| "rewards/margins": 0.28575873374938965, | |
| "rewards/rejected": -3.1282806396484375, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.996619582126586e-06, | |
| "logits/chosen": -2.803011894226074, | |
| "logits/rejected": -2.792786121368408, | |
| "logps/chosen": -192.3000946044922, | |
| "logps/rejected": -199.48699951171875, | |
| "loss": 0.8482, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -3.1065292358398438, | |
| "rewards/margins": 0.10464553534984589, | |
| "rewards/rejected": -3.211174726486206, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 5.924484714522473e-06, | |
| "logits/chosen": -2.7662460803985596, | |
| "logits/rejected": -2.782365322113037, | |
| "logps/chosen": -188.29409790039062, | |
| "logps/rejected": -161.1019744873047, | |
| "loss": 0.6869, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.632988452911377, | |
| "rewards/margins": 0.358073890209198, | |
| "rewards/rejected": -2.9910624027252197, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.8521500528389685e-06, | |
| "logits/chosen": -2.8015599250793457, | |
| "logits/rejected": -2.7828166484832764, | |
| "logps/chosen": -175.898681640625, | |
| "logps/rejected": -171.10836791992188, | |
| "loss": 0.6562, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.830270767211914, | |
| "rewards/margins": 0.31004253029823303, | |
| "rewards/rejected": -3.1403133869171143, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.779631229608352e-06, | |
| "logits/chosen": -2.814619541168213, | |
| "logits/rejected": -2.7792232036590576, | |
| "logps/chosen": -183.1345672607422, | |
| "logps/rejected": -180.23687744140625, | |
| "loss": 0.629, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.774634599685669, | |
| "rewards/margins": 0.35699692368507385, | |
| "rewards/rejected": -3.13163161277771, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.706943917162786e-06, | |
| "logits/chosen": -2.8597893714904785, | |
| "logits/rejected": -2.7718067169189453, | |
| "logps/chosen": -187.40206909179688, | |
| "logps/rejected": -167.49673461914062, | |
| "loss": 0.7502, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.7406411170959473, | |
| "rewards/margins": 0.28711724281311035, | |
| "rewards/rejected": -3.0277581214904785, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.634103824247312e-06, | |
| "logits/chosen": -2.8031020164489746, | |
| "logits/rejected": -2.770418643951416, | |
| "logps/chosen": -166.7908935546875, | |
| "logps/rejected": -169.6998748779297, | |
| "loss": 0.7387, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.8097705841064453, | |
| "rewards/margins": 0.22710688412189484, | |
| "rewards/rejected": -3.036877393722534, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 5.561126692624963e-06, | |
| "logits/chosen": -2.8061394691467285, | |
| "logits/rejected": -2.8010807037353516, | |
| "logps/chosen": -207.2965850830078, | |
| "logps/rejected": -172.8783721923828, | |
| "loss": 0.9337, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -3.002063035964966, | |
| "rewards/margins": -0.22279544174671173, | |
| "rewards/rejected": -2.7792675495147705, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.488028293674759e-06, | |
| "logits/chosen": -2.7473597526550293, | |
| "logits/rejected": -2.86487078666687, | |
| "logps/chosen": -145.13525390625, | |
| "logps/rejected": -196.2593231201172, | |
| "loss": 0.7441, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.414668321609497, | |
| "rewards/margins": 0.26904162764549255, | |
| "rewards/rejected": -2.6837100982666016, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 5.414824424983282e-06, | |
| "logits/chosen": -2.8032655715942383, | |
| "logits/rejected": -2.835156202316284, | |
| "logps/chosen": -174.91506958007812, | |
| "logps/rejected": -200.90908813476562, | |
| "loss": 0.8901, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -3.0149245262145996, | |
| "rewards/margins": -0.0794035792350769, | |
| "rewards/rejected": -2.935521364212036, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.341530906930604e-06, | |
| "logits/chosen": -2.849372386932373, | |
| "logits/rejected": -2.8182034492492676, | |
| "logps/chosen": -200.5182342529297, | |
| "logps/rejected": -165.98849487304688, | |
| "loss": 0.8282, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.665292263031006, | |
| "rewards/margins": 0.001919907284900546, | |
| "rewards/rejected": -2.6672122478485107, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 5.268163579271276e-06, | |
| "logits/chosen": -2.8164515495300293, | |
| "logits/rejected": -2.777838706970215, | |
| "logps/chosen": -158.1012420654297, | |
| "logps/rejected": -157.5447540283203, | |
| "loss": 0.5828, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -2.521636962890625, | |
| "rewards/margins": 0.5487133264541626, | |
| "rewards/rejected": -3.070350170135498, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.1947382977111374e-06, | |
| "logits/chosen": -2.806366443634033, | |
| "logits/rejected": -2.7530558109283447, | |
| "logps/chosen": -184.51922607421875, | |
| "logps/rejected": -184.57203674316406, | |
| "loss": 0.7037, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.7491421699523926, | |
| "rewards/margins": 0.25690436363220215, | |
| "rewards/rejected": -3.006046772003174, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.1212709304806774e-06, | |
| "logits/chosen": -2.837092399597168, | |
| "logits/rejected": -2.8233399391174316, | |
| "logps/chosen": -170.21676635742188, | |
| "logps/rejected": -175.38031005859375, | |
| "loss": 0.8019, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.616921901702881, | |
| "rewards/margins": 0.020748604089021683, | |
| "rewards/rejected": -2.6376705169677734, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 5.047777354905685e-06, | |
| "logits/chosen": -2.8479232788085938, | |
| "logits/rejected": -2.8166935443878174, | |
| "logps/chosen": -165.58253479003906, | |
| "logps/rejected": -178.3239288330078, | |
| "loss": 0.6336, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -2.5399913787841797, | |
| "rewards/margins": 0.3785735070705414, | |
| "rewards/rejected": -2.918564796447754, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.974273453975942e-06, | |
| "logits/chosen": -2.8627452850341797, | |
| "logits/rejected": -2.776982069015503, | |
| "logps/chosen": -198.88880920410156, | |
| "logps/rejected": -198.97647094726562, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -2.6049208641052246, | |
| "rewards/margins": 0.4274630546569824, | |
| "rewards/rejected": -3.032383918762207, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 4.900775112912684e-06, | |
| "logits/chosen": -2.7575716972351074, | |
| "logits/rejected": -2.7695202827453613, | |
| "logps/chosen": -151.79171752929688, | |
| "logps/rejected": -155.3359832763672, | |
| "loss": 0.6988, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.552367925643921, | |
| "rewards/margins": 0.287198007106781, | |
| "rewards/rejected": -2.8395657539367676, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.827298215735577e-06, | |
| "logits/chosen": -2.852078914642334, | |
| "logits/rejected": -2.7572147846221924, | |
| "logps/chosen": -183.82571411132812, | |
| "logps/rejected": -173.97152709960938, | |
| "loss": 0.8329, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.600367307662964, | |
| "rewards/margins": 0.07606508582830429, | |
| "rewards/rejected": -2.6764326095581055, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.75385864182997e-06, | |
| "logits/chosen": -2.801530361175537, | |
| "logits/rejected": -2.7251269817352295, | |
| "logps/chosen": -156.46092224121094, | |
| "logps/rejected": -130.04135131835938, | |
| "loss": 1.0341, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -3.1289448738098145, | |
| "rewards/margins": -0.38991624116897583, | |
| "rewards/rejected": -2.7390286922454834, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.680472262515123e-06, | |
| "logits/chosen": -2.8145370483398438, | |
| "logits/rejected": -2.8123114109039307, | |
| "logps/chosen": -132.849365234375, | |
| "logps/rejected": -148.0765838623047, | |
| "loss": 0.7382, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.4975039958953857, | |
| "rewards/margins": 0.32709741592407227, | |
| "rewards/rejected": -2.824601650238037, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.607154937614209e-06, | |
| "logits/chosen": -2.829385757446289, | |
| "logits/rejected": -2.8185107707977295, | |
| "logps/chosen": -173.2303466796875, | |
| "logps/rejected": -166.53912353515625, | |
| "loss": 0.6272, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.501225709915161, | |
| "rewards/margins": 0.4133445620536804, | |
| "rewards/rejected": -2.914569854736328, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.533922512026772e-06, | |
| "logits/chosen": -2.808532238006592, | |
| "logits/rejected": -2.8340225219726562, | |
| "logps/chosen": -156.80160522460938, | |
| "logps/rejected": -195.7024383544922, | |
| "loss": 0.8639, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -2.797833204269409, | |
| "rewards/margins": -0.15304505825042725, | |
| "rewards/rejected": -2.6447882652282715, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.4607908123044235e-06, | |
| "logits/chosen": -2.762845516204834, | |
| "logits/rejected": -2.80631685256958, | |
| "logps/chosen": -164.12916564941406, | |
| "logps/rejected": -168.5762481689453, | |
| "loss": 0.7842, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.7107739448547363, | |
| "rewards/margins": 0.14155347645282745, | |
| "rewards/rejected": -2.852327585220337, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 4.387775643230513e-06, | |
| "logits/chosen": -2.8005309104919434, | |
| "logits/rejected": -2.7626125812530518, | |
| "logps/chosen": -164.07000732421875, | |
| "logps/rejected": -161.9875946044922, | |
| "loss": 0.817, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.8267662525177, | |
| "rewards/margins": 0.035615064203739166, | |
| "rewards/rejected": -2.8623814582824707, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.3148927844044845e-06, | |
| "logits/chosen": -2.8500914573669434, | |
| "logits/rejected": -2.7665367126464844, | |
| "logps/chosen": -201.25502014160156, | |
| "logps/rejected": -181.4254608154297, | |
| "loss": 0.7916, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.674546241760254, | |
| "rewards/margins": 0.15138781070709229, | |
| "rewards/rejected": -2.8259339332580566, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 4.2421579868316835e-06, | |
| "logits/chosen": -2.871143341064453, | |
| "logits/rejected": -2.800727605819702, | |
| "logps/chosen": -213.4442901611328, | |
| "logps/rejected": -197.76211547851562, | |
| "loss": 0.7817, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.7138214111328125, | |
| "rewards/margins": 0.09940443933010101, | |
| "rewards/rejected": -2.8132259845733643, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.169586969519349e-06, | |
| "logits/chosen": -2.839171886444092, | |
| "logits/rejected": -2.7628910541534424, | |
| "logps/chosen": -185.0808563232422, | |
| "logps/rejected": -156.76638793945312, | |
| "loss": 0.8095, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.863227128982544, | |
| "rewards/margins": 0.06683845818042755, | |
| "rewards/rejected": -2.930065631866455, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.097195416079519e-06, | |
| "logits/chosen": -2.7593963146209717, | |
| "logits/rejected": -2.7466847896575928, | |
| "logps/chosen": -167.2697296142578, | |
| "logps/rejected": -151.8631134033203, | |
| "loss": 0.7299, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.843665361404419, | |
| "rewards/margins": 0.11098279803991318, | |
| "rewards/rejected": -2.954648494720459, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.024998971339572e-06, | |
| "logits/chosen": -2.819622039794922, | |
| "logits/rejected": -2.7979581356048584, | |
| "logps/chosen": -187.9668426513672, | |
| "logps/rejected": -220.19497680664062, | |
| "loss": 0.6732, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.7116570472717285, | |
| "rewards/margins": 0.3411490023136139, | |
| "rewards/rejected": -3.0528059005737305, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 3.953013237961169e-06, | |
| "logits/chosen": -2.8445045948028564, | |
| "logits/rejected": -2.7679443359375, | |
| "logps/chosen": -195.4940185546875, | |
| "logps/rejected": -221.0810546875, | |
| "loss": 0.6838, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.7768163681030273, | |
| "rewards/margins": 0.34462517499923706, | |
| "rewards/rejected": -3.12144136428833, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.8812537730683e-06, | |
| "logits/chosen": -2.7969844341278076, | |
| "logits/rejected": -2.824723482131958, | |
| "logps/chosen": -163.15548706054688, | |
| "logps/rejected": -178.32977294921875, | |
| "loss": 0.7611, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.8133432865142822, | |
| "rewards/margins": 0.08294029533863068, | |
| "rewards/rejected": -2.8962836265563965, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.80973608488517e-06, | |
| "logits/chosen": -2.7507481575012207, | |
| "logits/rejected": -2.7548985481262207, | |
| "logps/chosen": -138.450927734375, | |
| "logps/rejected": -146.4220733642578, | |
| "loss": 0.707, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.846163511276245, | |
| "rewards/margins": 0.16207213699817657, | |
| "rewards/rejected": -3.0082356929779053, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 3.73847562938465e-06, | |
| "logits/chosen": -2.8039064407348633, | |
| "logits/rejected": -2.735996723175049, | |
| "logps/chosen": -167.43582153320312, | |
| "logps/rejected": -138.5779571533203, | |
| "loss": 0.7448, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.9816246032714844, | |
| "rewards/margins": 0.11592672020196915, | |
| "rewards/rejected": -3.0975515842437744, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.6674878069480345e-06, | |
| "logits/chosen": -2.812595844268799, | |
| "logits/rejected": -2.8279619216918945, | |
| "logps/chosen": -207.8079376220703, | |
| "logps/rejected": -227.8889923095703, | |
| "loss": 0.6799, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.651724338531494, | |
| "rewards/margins": 0.2655607759952545, | |
| "rewards/rejected": -2.9172849655151367, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.5967879590368e-06, | |
| "logits/chosen": -2.8392462730407715, | |
| "logits/rejected": -2.80900239944458, | |
| "logps/chosen": -214.54019165039062, | |
| "logps/rejected": -240.7262420654297, | |
| "loss": 0.7693, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.56378436088562, | |
| "rewards/margins": 0.14276638627052307, | |
| "rewards/rejected": -2.7065505981445312, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.5263913648770974e-06, | |
| "logits/chosen": -2.7690839767456055, | |
| "logits/rejected": -2.796400785446167, | |
| "logps/chosen": -175.9673614501953, | |
| "logps/rejected": -169.49261474609375, | |
| "loss": 0.8555, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -3.1168508529663086, | |
| "rewards/margins": -0.02891675755381584, | |
| "rewards/rejected": -3.0879340171813965, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 3.456313238157697e-06, | |
| "logits/chosen": -2.8202641010284424, | |
| "logits/rejected": -2.775507688522339, | |
| "logps/chosen": -179.59512329101562, | |
| "logps/rejected": -172.36398315429688, | |
| "loss": 0.5841, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -2.501037120819092, | |
| "rewards/margins": 0.5284804105758667, | |
| "rewards/rejected": -3.029517412185669, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.3865687237421047e-06, | |
| "logits/chosen": -2.8022942543029785, | |
| "logits/rejected": -2.839221954345703, | |
| "logps/chosen": -178.23875427246094, | |
| "logps/rejected": -220.74722290039062, | |
| "loss": 0.7175, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.740363359451294, | |
| "rewards/margins": 0.14777664840221405, | |
| "rewards/rejected": -2.8881397247314453, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 3.317172894395541e-06, | |
| "logits/chosen": -2.8122408390045166, | |
| "logits/rejected": -2.7978949546813965, | |
| "logps/chosen": -193.39605712890625, | |
| "logps/rejected": -197.32260131835938, | |
| "loss": 0.7203, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.6169321537017822, | |
| "rewards/margins": 0.16434483230113983, | |
| "rewards/rejected": -2.7812769412994385, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.2481407475274995e-06, | |
| "logits/chosen": -2.812182664871216, | |
| "logits/rejected": -2.8286237716674805, | |
| "logps/chosen": -197.23226928710938, | |
| "logps/rejected": -221.1904754638672, | |
| "loss": 0.6232, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.409712314605713, | |
| "rewards/margins": 0.39808765053749084, | |
| "rewards/rejected": -2.8077995777130127, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.179487201950607e-06, | |
| "logits/chosen": -2.8621065616607666, | |
| "logits/rejected": -2.7421019077301025, | |
| "logps/chosen": -196.08909606933594, | |
| "logps/rejected": -170.71444702148438, | |
| "loss": 0.6596, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -2.5933759212493896, | |
| "rewards/margins": 0.4136047959327698, | |
| "rewards/rejected": -3.0069806575775146, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.111227094656457e-06, | |
| "logits/chosen": -2.8379364013671875, | |
| "logits/rejected": -2.763042688369751, | |
| "logps/chosen": -183.64736938476562, | |
| "logps/rejected": -172.20584106445312, | |
| "loss": 0.6305, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -2.426516056060791, | |
| "rewards/margins": 0.491415411233902, | |
| "rewards/rejected": -2.917931079864502, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 3.0433751776091243e-06, | |
| "logits/chosen": -2.772272825241089, | |
| "logits/rejected": -2.797128200531006, | |
| "logps/chosen": -172.43173217773438, | |
| "logps/rejected": -167.80349731445312, | |
| "loss": 0.7952, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -2.848965644836426, | |
| "rewards/margins": 0.02570332959294319, | |
| "rewards/rejected": -2.874669075012207, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 2.9759461145570562e-06, | |
| "logits/chosen": -2.8297877311706543, | |
| "logits/rejected": -2.812828302383423, | |
| "logps/chosen": -154.22589111328125, | |
| "logps/rejected": -165.05397033691406, | |
| "loss": 0.7833, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.5259437561035156, | |
| "rewards/margins": 0.10835230350494385, | |
| "rewards/rejected": -2.63429594039917, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.9089544778640434e-06, | |
| "logits/chosen": -2.8057451248168945, | |
| "logits/rejected": -2.8399128913879395, | |
| "logps/chosen": -188.55502319335938, | |
| "logps/rejected": -195.39517211914062, | |
| "loss": 0.7464, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.9316458702087402, | |
| "rewards/margins": 0.07946301996707916, | |
| "rewards/rejected": -3.0111091136932373, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 2.84241474535992e-06, | |
| "logits/chosen": -2.8304762840270996, | |
| "logits/rejected": -2.805596351623535, | |
| "logps/chosen": -173.1647491455078, | |
| "logps/rejected": -199.00692749023438, | |
| "loss": 0.7088, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.439711570739746, | |
| "rewards/margins": 0.4066222608089447, | |
| "rewards/rejected": -2.8463339805603027, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.7763412972117e-06, | |
| "logits/chosen": -2.7816245555877686, | |
| "logits/rejected": -2.7993526458740234, | |
| "logps/chosen": -163.3373260498047, | |
| "logps/rejected": -181.728515625, | |
| "loss": 0.7681, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -2.659446954727173, | |
| "rewards/margins": 0.027357567101716995, | |
| "rewards/rejected": -2.6868045330047607, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 2.7107484128158257e-06, | |
| "logits/chosen": -2.834667205810547, | |
| "logits/rejected": -2.820902109146118, | |
| "logps/chosen": -164.51974487304688, | |
| "logps/rejected": -185.0774688720703, | |
| "loss": 0.922, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.854031562805176, | |
| "rewards/margins": -0.12820354104042053, | |
| "rewards/rejected": -2.7258284091949463, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.6456502677121955e-06, | |
| "logits/chosen": -2.8490092754364014, | |
| "logits/rejected": -2.7348694801330566, | |
| "logps/chosen": -170.228271484375, | |
| "logps/rejected": -161.79847717285156, | |
| "loss": 0.5544, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.4652602672576904, | |
| "rewards/margins": 0.6136289834976196, | |
| "rewards/rejected": -3.0788893699645996, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.5810609305206187e-06, | |
| "logits/chosen": -2.8274359703063965, | |
| "logits/rejected": -2.808927059173584, | |
| "logps/chosen": -186.00840759277344, | |
| "logps/rejected": -195.20870971679688, | |
| "loss": 0.7217, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.6177451610565186, | |
| "rewards/margins": 0.2328224927186966, | |
| "rewards/rejected": -2.850567579269409, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 2.516994359900401e-06, | |
| "logits/chosen": -2.792170286178589, | |
| "logits/rejected": -2.78896164894104, | |
| "logps/chosen": -162.92263793945312, | |
| "logps/rejected": -163.746337890625, | |
| "loss": 0.679, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.576962947845459, | |
| "rewards/margins": 0.2438470423221588, | |
| "rewards/rejected": -2.820809841156006, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.4534644015336767e-06, | |
| "logits/chosen": -2.8285109996795654, | |
| "logits/rejected": -2.8009676933288574, | |
| "logps/chosen": -194.52418518066406, | |
| "logps/rejected": -220.3380889892578, | |
| "loss": 0.8011, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.766613245010376, | |
| "rewards/margins": 0.1788259595632553, | |
| "rewards/rejected": -2.945439338684082, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 2.3904847851331697e-06, | |
| "logits/chosen": -2.887859582901001, | |
| "logits/rejected": -2.8012211322784424, | |
| "logps/chosen": -182.5015106201172, | |
| "logps/rejected": -169.3479461669922, | |
| "loss": 0.564, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -2.494227886199951, | |
| "rewards/margins": 0.6495568156242371, | |
| "rewards/rejected": -3.143784999847412, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.3280691214750024e-06, | |
| "logits/chosen": -2.787199020385742, | |
| "logits/rejected": -2.776179075241089, | |
| "logps/chosen": -155.46151733398438, | |
| "logps/rejected": -158.6356658935547, | |
| "loss": 0.7875, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.8484859466552734, | |
| "rewards/margins": 0.10379151999950409, | |
| "rewards/rejected": -2.952277660369873, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.266230899457221e-06, | |
| "logits/chosen": -2.8023529052734375, | |
| "logits/rejected": -2.7784194946289062, | |
| "logps/chosen": -142.9674530029297, | |
| "logps/rejected": -157.74266052246094, | |
| "loss": 0.7626, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -2.6773407459259033, | |
| "rewards/margins": 0.07623550295829773, | |
| "rewards/rejected": -2.7535762786865234, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.2049834831846565e-06, | |
| "logits/chosen": -2.811038017272949, | |
| "logits/rejected": -2.7344298362731934, | |
| "logps/chosen": -157.92982482910156, | |
| "logps/rejected": -150.5963134765625, | |
| "loss": 0.8051, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -2.59382963180542, | |
| "rewards/margins": 0.03338869288563728, | |
| "rewards/rejected": -2.627218723297119, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.1443401090807537e-06, | |
| "logits/chosen": -2.741464138031006, | |
| "logits/rejected": -2.8247230052948, | |
| "logps/chosen": -147.42611694335938, | |
| "logps/rejected": -194.6453857421875, | |
| "loss": 0.7755, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.945338249206543, | |
| "rewards/margins": 0.02999408170580864, | |
| "rewards/rejected": -2.975332260131836, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.084313883026991e-06, | |
| "logits/chosen": -2.8469436168670654, | |
| "logits/rejected": -2.809089183807373, | |
| "logps/chosen": -173.6985626220703, | |
| "logps/rejected": -163.20140075683594, | |
| "loss": 0.6061, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.3887710571289062, | |
| "rewards/margins": 0.4348909854888916, | |
| "rewards/rejected": -2.823662281036377, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.024917777530528e-06, | |
| "logits/chosen": -2.7014293670654297, | |
| "logits/rejected": -2.7902514934539795, | |
| "logps/chosen": -143.2289581298828, | |
| "logps/rejected": -153.96127319335938, | |
| "loss": 0.7187, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.667060375213623, | |
| "rewards/margins": 0.1122710332274437, | |
| "rewards/rejected": -2.779331684112549, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.9661646289206498e-06, | |
| "logits/chosen": -2.809936761856079, | |
| "logits/rejected": -2.7888851165771484, | |
| "logps/chosen": -176.75888061523438, | |
| "logps/rejected": -198.2320098876953, | |
| "loss": 0.7429, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.7881858348846436, | |
| "rewards/margins": 0.06710895150899887, | |
| "rewards/rejected": -2.855294704437256, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.9080671345746665e-06, | |
| "logits/chosen": -2.808663845062256, | |
| "logits/rejected": -2.772554874420166, | |
| "logps/chosen": -164.91775512695312, | |
| "logps/rejected": -183.65895080566406, | |
| "loss": 0.7546, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -3.0473217964172363, | |
| "rewards/margins": 0.20765431225299835, | |
| "rewards/rejected": -3.254976272583008, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.8506378501738287e-06, | |
| "logits/chosen": -2.8200621604919434, | |
| "logits/rejected": -2.7924609184265137, | |
| "logps/chosen": -181.00875854492188, | |
| "logps/rejected": -172.7202911376953, | |
| "loss": 0.7263, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.486166000366211, | |
| "rewards/margins": 0.3062500059604645, | |
| "rewards/rejected": -2.7924158573150635, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.7938891869898745e-06, | |
| "logits/chosen": -2.815359592437744, | |
| "logits/rejected": -2.8368589878082275, | |
| "logps/chosen": -160.1107940673828, | |
| "logps/rejected": -195.07188415527344, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.496129274368286, | |
| "rewards/margins": 0.23404176533222198, | |
| "rewards/rejected": -2.730170726776123, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.737833409202771e-06, | |
| "logits/chosen": -2.802074670791626, | |
| "logits/rejected": -2.820376396179199, | |
| "logps/chosen": -184.555908203125, | |
| "logps/rejected": -223.18679809570312, | |
| "loss": 0.8133, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.6580727100372314, | |
| "rewards/margins": 0.08000078052282333, | |
| "rewards/rejected": -2.7380733489990234, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.68248263125026e-06, | |
| "logits/chosen": -2.874215841293335, | |
| "logits/rejected": -2.8056039810180664, | |
| "logps/chosen": -179.6226043701172, | |
| "logps/rejected": -156.4929962158203, | |
| "loss": 0.7329, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.454094409942627, | |
| "rewards/margins": 0.17153581976890564, | |
| "rewards/rejected": -2.6256299018859863, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.6278488152097561e-06, | |
| "logits/chosen": -2.8101634979248047, | |
| "logits/rejected": -2.8042900562286377, | |
| "logps/chosen": -183.9718475341797, | |
| "logps/rejected": -174.69107055664062, | |
| "loss": 0.6246, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.7623867988586426, | |
| "rewards/margins": 0.38758453726768494, | |
| "rewards/rejected": -3.1499714851379395, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.5739437682131781e-06, | |
| "logits/chosen": -2.787309169769287, | |
| "logits/rejected": -2.8263907432556152, | |
| "logps/chosen": -142.33966064453125, | |
| "logps/rejected": -159.15042114257812, | |
| "loss": 0.7638, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.6954445838928223, | |
| "rewards/margins": 0.011251891031861305, | |
| "rewards/rejected": -2.7066962718963623, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.5207791398952566e-06, | |
| "logits/chosen": -2.7666656970977783, | |
| "logits/rejected": -2.8552584648132324, | |
| "logps/chosen": -154.00985717773438, | |
| "logps/rejected": -186.39874267578125, | |
| "loss": 0.8005, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.607551097869873, | |
| "rewards/margins": 0.07413578033447266, | |
| "rewards/rejected": -2.681687116622925, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.468366419875889e-06, | |
| "logits/chosen": -2.8431591987609863, | |
| "logits/rejected": -2.71547269821167, | |
| "logps/chosen": -160.5503692626953, | |
| "logps/rejected": -144.08738708496094, | |
| "loss": 0.6574, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.6430201530456543, | |
| "rewards/margins": 0.27612167596817017, | |
| "rewards/rejected": -2.9191417694091797, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.4167169352770705e-06, | |
| "logits/chosen": -2.7907159328460693, | |
| "logits/rejected": -2.819542169570923, | |
| "logps/chosen": -137.74073791503906, | |
| "logps/rejected": -165.55447387695312, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.555119514465332, | |
| "rewards/margins": 0.26977163553237915, | |
| "rewards/rejected": -2.8248908519744873, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.3658418482749447e-06, | |
| "logits/chosen": -2.830343246459961, | |
| "logits/rejected": -2.8575193881988525, | |
| "logps/chosen": -214.9622344970703, | |
| "logps/rejected": -234.50558471679688, | |
| "loss": 0.8771, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -2.599214553833008, | |
| "rewards/margins": 0.006308627314865589, | |
| "rewards/rejected": -2.6055233478546143, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.315752153687505e-06, | |
| "logits/chosen": -2.8138668537139893, | |
| "logits/rejected": -2.8360681533813477, | |
| "logps/chosen": -178.30795288085938, | |
| "logps/rejected": -187.0048828125, | |
| "loss": 0.7562, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -2.6052770614624023, | |
| "rewards/margins": 0.23422375321388245, | |
| "rewards/rejected": -2.839500904083252, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.2664586765984532e-06, | |
| "logits/chosen": -2.8095240592956543, | |
| "logits/rejected": -2.7641987800598145, | |
| "logps/chosen": -156.68167114257812, | |
| "logps/rejected": -164.3984375, | |
| "loss": 0.7838, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -2.9786505699157715, | |
| "rewards/margins": 0.023244787007570267, | |
| "rewards/rejected": -3.0018954277038574, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.2179720700177523e-06, | |
| "logits/chosen": -2.8782966136932373, | |
| "logits/rejected": -2.7959225177764893, | |
| "logps/chosen": -200.2810821533203, | |
| "logps/rejected": -164.7145233154297, | |
| "loss": 0.7743, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -2.700646162033081, | |
| "rewards/margins": 0.11083149909973145, | |
| "rewards/rejected": -2.8114776611328125, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.1703028125793609e-06, | |
| "logits/chosen": -2.888286590576172, | |
| "logits/rejected": -2.8809831142425537, | |
| "logps/chosen": -193.85348510742188, | |
| "logps/rejected": -202.87399291992188, | |
| "loss": 0.6969, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -2.6780223846435547, | |
| "rewards/margins": 0.20563840866088867, | |
| "rewards/rejected": -2.8836607933044434, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.123461206276657e-06, | |
| "logits/chosen": -2.812075138092041, | |
| "logits/rejected": -2.818878650665283, | |
| "logps/chosen": -172.5928192138672, | |
| "logps/rejected": -193.533203125, | |
| "loss": 0.5796, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.632546901702881, | |
| "rewards/margins": 0.5040704011917114, | |
| "rewards/rejected": -3.136617422103882, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0774573742360272e-06, | |
| "logits/chosen": -2.8028206825256348, | |
| "logits/rejected": -2.736189365386963, | |
| "logps/chosen": -169.78982543945312, | |
| "logps/rejected": -176.23049926757812, | |
| "loss": 0.82, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.736199378967285, | |
| "rewards/margins": 0.049911629408597946, | |
| "rewards/rejected": -2.786111354827881, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 1.0323012585291215e-06, | |
| "logits/chosen": -2.79705810546875, | |
| "logits/rejected": -2.7956995964050293, | |
| "logps/chosen": -174.64675903320312, | |
| "logps/rejected": -165.0634307861328, | |
| "loss": 0.8287, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.620288372039795, | |
| "rewards/margins": -0.024537205696105957, | |
| "rewards/rejected": -2.5957512855529785, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.880026180242303e-07, | |
| "logits/chosen": -2.8729324340820312, | |
| "logits/rejected": -2.7513458728790283, | |
| "logps/chosen": -192.78225708007812, | |
| "logps/rejected": -231.4573211669922, | |
| "loss": 0.5742, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -2.3998770713806152, | |
| "rewards/margins": 0.719444751739502, | |
| "rewards/rejected": -3.1193220615386963, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 9.445710262772595e-07, | |
| "logits/chosen": -2.814854145050049, | |
| "logits/rejected": -2.84086275100708, | |
| "logps/chosen": -174.31187438964844, | |
| "logps/rejected": -205.2336883544922, | |
| "loss": 0.7557, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.8106675148010254, | |
| "rewards/margins": 0.03317015990614891, | |
| "rewards/rejected": -2.8438377380371094, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 9.020158694627468e-07, | |
| "logits/chosen": -2.811493396759033, | |
| "logits/rejected": -2.804260730743408, | |
| "logps/chosen": -151.4232940673828, | |
| "logps/rejected": -166.83543395996094, | |
| "loss": 0.709, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.587904691696167, | |
| "rewards/margins": 0.19390352070331573, | |
| "rewards/rejected": -2.781808376312256, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 8.603463443453847e-07, | |
| "logits/chosen": -2.761857271194458, | |
| "logits/rejected": -2.814115047454834, | |
| "logps/chosen": -166.73175048828125, | |
| "logps/rejected": -221.6857147216797, | |
| "loss": 0.6743, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.802481174468994, | |
| "rewards/margins": 0.3902018368244171, | |
| "rewards/rejected": -3.1926827430725098, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 8.195714562924634e-07, | |
| "logits/chosen": -2.832383632659912, | |
| "logits/rejected": -2.8002467155456543, | |
| "logps/chosen": -173.4875946044922, | |
| "logps/rejected": -165.5963134765625, | |
| "loss": 0.7844, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -2.961275100708008, | |
| "rewards/margins": 0.021215522661805153, | |
| "rewards/rejected": -2.9824907779693604, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 7.797000173276897e-07, | |
| "logits/chosen": -2.8296289443969727, | |
| "logits/rejected": -2.8257927894592285, | |
| "logps/chosen": -153.4661102294922, | |
| "logps/rejected": -173.27731323242188, | |
| "loss": 0.7625, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.775959014892578, | |
| "rewards/margins": 0.10331162065267563, | |
| "rewards/rejected": -2.8792707920074463, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.407406442267823e-07, | |
| "logits/chosen": -2.8164515495300293, | |
| "logits/rejected": -2.7793452739715576, | |
| "logps/chosen": -191.849365234375, | |
| "logps/rejected": -159.09506225585938, | |
| "loss": 0.7604, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.8845598697662354, | |
| "rewards/margins": 0.1206502914428711, | |
| "rewards/rejected": -3.0052101612091064, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 7.027017566552719e-07, | |
| "logits/chosen": -2.832385778427124, | |
| "logits/rejected": -2.7720022201538086, | |
| "logps/chosen": -167.87684631347656, | |
| "logps/rejected": -161.11094665527344, | |
| "loss": 0.8388, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -2.6847903728485107, | |
| "rewards/margins": -0.003799390746280551, | |
| "rewards/rejected": -2.680990695953369, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 6.655915753488862e-07, | |
| "logits/chosen": -2.862640857696533, | |
| "logits/rejected": -2.8090598583221436, | |
| "logps/chosen": -167.6973876953125, | |
| "logps/rejected": -173.8731231689453, | |
| "loss": 0.7996, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.4128682613372803, | |
| "rewards/margins": 0.12912854552268982, | |
| "rewards/rejected": -2.541997194290161, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 6.294181203369382e-07, | |
| "logits/chosen": -2.8509562015533447, | |
| "logits/rejected": -2.8277828693389893, | |
| "logps/chosen": -195.5679473876953, | |
| "logps/rejected": -203.61373901367188, | |
| "loss": 0.6741, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -2.731907367706299, | |
| "rewards/margins": 0.2566409111022949, | |
| "rewards/rejected": -2.988548517227173, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 5.941892092090839e-07, | |
| "logits/chosen": -2.860921859741211, | |
| "logits/rejected": -2.8239235877990723, | |
| "logps/chosen": -201.4016876220703, | |
| "logps/rejected": -207.51223754882812, | |
| "loss": 0.841, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -2.865007162094116, | |
| "rewards/margins": 0.011128646321594715, | |
| "rewards/rejected": -2.87613582611084, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.599124554258372e-07, | |
| "logits/chosen": -2.8042502403259277, | |
| "logits/rejected": -2.7808499336242676, | |
| "logps/chosen": -179.14642333984375, | |
| "logps/rejected": -210.1792755126953, | |
| "loss": 0.8441, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -2.6821835041046143, | |
| "rewards/margins": 0.053558606654405594, | |
| "rewards/rejected": -2.7357418537139893, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.265952666731833e-07, | |
| "logits/chosen": -2.8422186374664307, | |
| "logits/rejected": -2.789271831512451, | |
| "logps/chosen": -203.44155883789062, | |
| "logps/rejected": -182.6937255859375, | |
| "loss": 0.6031, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.501049757003784, | |
| "rewards/margins": 0.4275892674922943, | |
| "rewards/rejected": -2.9286391735076904, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.942448432616864e-07, | |
| "logits/chosen": -2.848710775375366, | |
| "logits/rejected": -2.824723720550537, | |
| "logps/chosen": -170.21578979492188, | |
| "logps/rejected": -177.10244750976562, | |
| "loss": 0.7331, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -2.5962746143341064, | |
| "rewards/margins": 0.18858730792999268, | |
| "rewards/rejected": -2.7848620414733887, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 4.6286817657039296e-07, | |
| "logits/chosen": -2.81172513961792, | |
| "logits/rejected": -2.800938129425049, | |
| "logps/chosen": -151.9397735595703, | |
| "logps/rejected": -164.53866577148438, | |
| "loss": 0.8516, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -2.7378716468811035, | |
| "rewards/margins": -0.09113232791423798, | |
| "rewards/rejected": -2.6467392444610596, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.3247204753590564e-07, | |
| "logits/chosen": -2.8164124488830566, | |
| "logits/rejected": -2.7626748085021973, | |
| "logps/chosen": -177.814697265625, | |
| "logps/rejected": -178.20785522460938, | |
| "loss": 0.6974, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -2.6230568885803223, | |
| "rewards/margins": 0.3184908926486969, | |
| "rewards/rejected": -2.9415478706359863, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 4.030630251869144e-07, | |
| "logits/chosen": -2.821056604385376, | |
| "logits/rejected": -2.767781972885132, | |
| "logps/chosen": -179.95680236816406, | |
| "logps/rejected": -197.38258361816406, | |
| "loss": 0.6036, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -2.667468547821045, | |
| "rewards/margins": 0.4341276288032532, | |
| "rewards/rejected": -3.1015961170196533, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 3.74647465224553e-07, | |
| "logits/chosen": -2.858065128326416, | |
| "logits/rejected": -2.826904058456421, | |
| "logps/chosen": -219.012451171875, | |
| "logps/rejected": -217.1422576904297, | |
| "loss": 0.6656, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -2.7656092643737793, | |
| "rewards/margins": 0.31034404039382935, | |
| "rewards/rejected": -3.075953245162964, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 3.4723150864883257e-07, | |
| "logits/chosen": -2.8574299812316895, | |
| "logits/rejected": -2.8202450275421143, | |
| "logps/chosen": -188.3092498779297, | |
| "logps/rejected": -222.70651245117188, | |
| "loss": 0.7819, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -2.619265079498291, | |
| "rewards/margins": 0.1903989017009735, | |
| "rewards/rejected": -2.809664011001587, | |
| "step": 2000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |