| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1319, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.000758150113722517, |
| "grad_norm": 210.55908519352158, |
| "learning_rate": 0.0, |
| "logits/chosen": 0.103515625, |
| "logits/rejected": -0.091796875, |
| "logps/chosen": -268.0, |
| "logps/rejected": -424.0, |
| "loss": 0.6914, |
| "nll_loss": 0.7265625, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0075815011372251705, |
| "grad_norm": 241.994037571191, |
| "learning_rate": 3.4090909090909086e-08, |
| "logits/chosen": 0.0464274100959301, |
| "logits/rejected": -0.0338541679084301, |
| "logps/chosen": -718.888916015625, |
| "logps/rejected": -731.7777709960938, |
| "loss": 0.6853, |
| "nll_loss": 1.5737847089767456, |
| "rewards/accuracies": 0.2916666567325592, |
| "rewards/chosen": -0.0027330187149345875, |
| "rewards/margins": 0.0465325266122818, |
| "rewards/rejected": -0.0492689348757267, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.015163002274450341, |
| "grad_norm": 219.94111891737467, |
| "learning_rate": 7.196969696969697e-08, |
| "logits/chosen": 0.04271240159869194, |
| "logits/rejected": -0.11677245795726776, |
| "logps/chosen": -529.0, |
| "logps/rejected": -526.5999755859375, |
| "loss": 0.7042, |
| "nll_loss": 1.3855469226837158, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.0008987426990643144, |
| "rewards/margins": -0.004055785946547985, |
| "rewards/rejected": 0.00311279296875, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.022744503411675512, |
| "grad_norm": 226.72439288143673, |
| "learning_rate": 1.0984848484848484e-07, |
| "logits/chosen": 0.06171875074505806, |
| "logits/rejected": -0.10219726711511612, |
| "logps/chosen": -562.0, |
| "logps/rejected": -548.7999877929688, |
| "loss": 0.7261, |
| "nll_loss": 1.4132812023162842, |
| "rewards/accuracies": 0.15000000596046448, |
| "rewards/chosen": -0.02811889722943306, |
| "rewards/margins": -0.05191650241613388, |
| "rewards/rejected": 0.02377929724752903, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.030326004548900682, |
| "grad_norm": 278.9678714919932, |
| "learning_rate": 1.4772727272727272e-07, |
| "logits/chosen": 0.09233398735523224, |
| "logits/rejected": 0.05045165866613388, |
| "logps/chosen": -507.6000061035156, |
| "logps/rejected": -500.8999938964844, |
| "loss": 0.6926, |
| "nll_loss": 1.4015624523162842, |
| "rewards/accuracies": 0.22499999403953552, |
| "rewards/chosen": 0.003143310546875, |
| "rewards/margins": 0.0072265625931322575, |
| "rewards/rejected": -0.0040679932571947575, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03790750568612585, |
| "grad_norm": 290.4234528231276, |
| "learning_rate": 1.856060606060606e-07, |
| "logits/chosen": 0.07624511420726776, |
| "logits/rejected": -0.01950683631002903, |
| "logps/chosen": -615.5999755859375, |
| "logps/rejected": -609.0, |
| "loss": 0.7079, |
| "nll_loss": 1.257421851158142, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.02750244177877903, |
| "rewards/margins": -0.009429931640625, |
| "rewards/rejected": -0.018157958984375, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.045489006823351025, |
| "grad_norm": 188.43845642237048, |
| "learning_rate": 2.2348484848484846e-07, |
| "logits/chosen": 0.04985351487994194, |
| "logits/rejected": -0.03656005859375, |
| "logps/chosen": -567.7999877929688, |
| "logps/rejected": -537.0, |
| "loss": 0.6684, |
| "nll_loss": 1.212499976158142, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": 0.02692871168255806, |
| "rewards/margins": 0.07955016940832138, |
| "rewards/rejected": -0.05264892429113388, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.05307050796057619, |
| "grad_norm": 232.64144162428008, |
| "learning_rate": 2.6136363636363634e-07, |
| "logits/chosen": 0.11154785007238388, |
| "logits/rejected": -0.06640625, |
| "logps/chosen": -487.0, |
| "logps/rejected": -482.3999938964844, |
| "loss": 0.65, |
| "nll_loss": 1.366796851158142, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.10163573920726776, |
| "rewards/margins": 0.10463867336511612, |
| "rewards/rejected": -0.0027893066871911287, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.060652009097801364, |
| "grad_norm": 192.2740828794497, |
| "learning_rate": 2.9924242424242425e-07, |
| "logits/chosen": 0.01751098595559597, |
| "logits/rejected": -0.09003905951976776, |
| "logps/chosen": -620.7999877929688, |
| "logps/rejected": -595.5999755859375, |
| "loss": 0.6192, |
| "nll_loss": 1.321874976158142, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.17548827826976776, |
| "rewards/margins": 0.23996582627296448, |
| "rewards/rejected": -0.06424560397863388, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06823351023502654, |
| "grad_norm": 146.481409622457, |
| "learning_rate": 3.371212121212121e-07, |
| "logits/chosen": 0.14125975966453552, |
| "logits/rejected": -0.03948974609375, |
| "logps/chosen": -443.20001220703125, |
| "logps/rejected": -513.7999877929688, |
| "loss": 0.5497, |
| "nll_loss": 1.1339843273162842, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.3358398377895355, |
| "rewards/margins": 0.4330078065395355, |
| "rewards/rejected": -0.09690551459789276, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0758150113722517, |
| "grad_norm": 227.90269927612718, |
| "learning_rate": 3.75e-07, |
| "logits/chosen": 0.15236815810203552, |
| "logits/rejected": -0.06716308742761612, |
| "logps/chosen": -483.20001220703125, |
| "logps/rejected": -490.3999938964844, |
| "loss": 0.5727, |
| "nll_loss": 1.2472655773162842, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.44428712129592896, |
| "rewards/margins": 0.4510253965854645, |
| "rewards/rejected": -0.006976318545639515, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.08339651250947688, |
| "grad_norm": 156.4379588057543, |
| "learning_rate": 4.1287878787878786e-07, |
| "logits/chosen": 0.05620117112994194, |
| "logits/rejected": -0.04985351487994194, |
| "logps/chosen": -581.2000122070312, |
| "logps/rejected": -519.2000122070312, |
| "loss": 0.4982, |
| "nll_loss": 1.1378905773162842, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.47028809785842896, |
| "rewards/margins": 0.6709960699081421, |
| "rewards/rejected": -0.20065002143383026, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.09097801364670205, |
| "grad_norm": 105.91864437535659, |
| "learning_rate": 4.507575757575757e-07, |
| "logits/chosen": 0.11386718600988388, |
| "logits/rejected": -0.05363769456744194, |
| "logps/chosen": -446.6000061035156, |
| "logps/rejected": -519.5999755859375, |
| "loss": 0.5331, |
| "nll_loss": 1.158203125, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.4749999940395355, |
| "rewards/margins": 0.6854003667831421, |
| "rewards/rejected": -0.21049194037914276, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.09855951478392722, |
| "grad_norm": 163.62052291292468, |
| "learning_rate": 4.886363636363636e-07, |
| "logits/chosen": 0.04210205003619194, |
| "logits/rejected": -0.01055908203125, |
| "logps/chosen": -513.2000122070312, |
| "logps/rejected": -441.79998779296875, |
| "loss": 0.5658, |
| "nll_loss": 1.200781226158142, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.21035155653953552, |
| "rewards/margins": 0.517773449420929, |
| "rewards/rejected": -0.3070312440395355, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.10614101592115238, |
| "grad_norm": 99.07353233355678, |
| "learning_rate": 4.970513900589722e-07, |
| "logits/chosen": 0.02568359300494194, |
| "logits/rejected": -0.09825439751148224, |
| "logps/chosen": -674.0, |
| "logps/rejected": -668.0, |
| "loss": 0.5372, |
| "nll_loss": 1.2999999523162842, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.5035644769668579, |
| "rewards/margins": 0.765917956829071, |
| "rewards/rejected": -0.2628173828125, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.11372251705837756, |
| "grad_norm": 202.594954494209, |
| "learning_rate": 4.928390901432181e-07, |
| "logits/chosen": 0.137481689453125, |
| "logits/rejected": 0.03671874850988388, |
| "logps/chosen": -447.0, |
| "logps/rejected": -395.79998779296875, |
| "loss": 0.5368, |
| "nll_loss": 1.240625023841858, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.985156238079071, |
| "rewards/margins": 0.844433605670929, |
| "rewards/rejected": 0.13916015625, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.12130401819560273, |
| "grad_norm": 126.84406592888543, |
| "learning_rate": 4.886267902274642e-07, |
| "logits/chosen": 0.081298828125, |
| "logits/rejected": -0.04389648512005806, |
| "logps/chosen": -450.0, |
| "logps/rejected": -492.0, |
| "loss": 0.5079, |
| "nll_loss": 1.365625023841858, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.095312476158142, |
| "rewards/margins": 0.84423828125, |
| "rewards/rejected": 0.25, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.1288855193328279, |
| "grad_norm": 151.66256652761416, |
| "learning_rate": 4.844144903117102e-07, |
| "logits/chosen": 0.11357422173023224, |
| "logits/rejected": -0.02805175818502903, |
| "logps/chosen": -413.79998779296875, |
| "logps/rejected": -412.79998779296875, |
| "loss": 0.4046, |
| "nll_loss": 1.167578101158142, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.037500023841858, |
| "rewards/margins": 1.275781273841858, |
| "rewards/rejected": -0.23802490532398224, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.13646702047005307, |
| "grad_norm": 159.18227673830674, |
| "learning_rate": 4.802021903959561e-07, |
| "logits/chosen": 0.05300293117761612, |
| "logits/rejected": -0.04742431640625, |
| "logps/chosen": -607.7999877929688, |
| "logps/rejected": -567.0, |
| "loss": 0.4766, |
| "nll_loss": 1.204687476158142, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.9892578125, |
| "rewards/margins": 1.1962890625, |
| "rewards/rejected": -0.20742186903953552, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.14404852160727824, |
| "grad_norm": 81.59362392185547, |
| "learning_rate": 4.759898904802022e-07, |
| "logits/chosen": 0.11760254204273224, |
| "logits/rejected": -0.06528320163488388, |
| "logps/chosen": -487.79998779296875, |
| "logps/rejected": -446.6000061035156, |
| "loss": 0.5373, |
| "nll_loss": 1.3742187023162842, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.6246887445449829, |
| "rewards/margins": 1.1572265625, |
| "rewards/rejected": -0.5337585210800171, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1516300227445034, |
| "grad_norm": 247.23828828724723, |
| "learning_rate": 4.7177759056444814e-07, |
| "logits/chosen": 0.17337647080421448, |
| "logits/rejected": 0.041778564453125, |
| "logps/chosen": -546.2000122070312, |
| "logps/rejected": -551.4000244140625, |
| "loss": 0.5237, |
| "nll_loss": 1.338281273841858, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.5738281011581421, |
| "rewards/margins": 0.996289074420929, |
| "rewards/rejected": -0.4232421815395355, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.15921152388172857, |
| "grad_norm": 177.0807309921808, |
| "learning_rate": 4.6756529064869416e-07, |
| "logits/chosen": 0.1591796875, |
| "logits/rejected": 0.11337890475988388, |
| "logps/chosen": -491.70001220703125, |
| "logps/rejected": -469.6000061035156, |
| "loss": 0.4459, |
| "nll_loss": 1.226171851158142, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 1.101953148841858, |
| "rewards/margins": 1.295312523841858, |
| "rewards/rejected": -0.19340820610523224, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.16679302501895377, |
| "grad_norm": 140.05095261979244, |
| "learning_rate": 4.633529907329402e-07, |
| "logits/chosen": 0.09412841498851776, |
| "logits/rejected": -0.07822265475988388, |
| "logps/chosen": -696.7999877929688, |
| "logps/rejected": -695.2000122070312, |
| "loss": 0.4674, |
| "nll_loss": 1.373046875, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 1.043554663658142, |
| "rewards/margins": 1.4679687023162842, |
| "rewards/rejected": -0.42412108182907104, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.17437452615617893, |
| "grad_norm": 143.39089630193985, |
| "learning_rate": 4.5914069081718614e-07, |
| "logits/chosen": 0.15617676079273224, |
| "logits/rejected": 0.0377197265625, |
| "logps/chosen": -533.5999755859375, |
| "logps/rejected": -573.5999755859375, |
| "loss": 0.4671, |
| "nll_loss": 1.2648437023162842, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.642871081829071, |
| "rewards/margins": 1.3449218273162842, |
| "rewards/rejected": -0.7015625238418579, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.1819560272934041, |
| "grad_norm": 174.53324433123169, |
| "learning_rate": 4.5492839090143215e-07, |
| "logits/chosen": 0.19760742783546448, |
| "logits/rejected": 0.0694580078125, |
| "logps/chosen": -479.0, |
| "logps/rejected": -460.79998779296875, |
| "loss": 0.5019, |
| "nll_loss": 1.2683594226837158, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.4159912168979645, |
| "rewards/margins": 1.125390648841858, |
| "rewards/rejected": -0.709277331829071, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.18953752843062927, |
| "grad_norm": 96.48112981479892, |
| "learning_rate": 4.5071609098567817e-07, |
| "logits/chosen": 0.11848144233226776, |
| "logits/rejected": 0.06085815280675888, |
| "logps/chosen": -738.5999755859375, |
| "logps/rejected": -651.7999877929688, |
| "loss": 0.4883, |
| "nll_loss": 1.408593773841858, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.83154296875, |
| "rewards/margins": 1.3964354991912842, |
| "rewards/rejected": -0.5653320550918579, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.19711902956785443, |
| "grad_norm": 342.616952137923, |
| "learning_rate": 4.4650379106992413e-07, |
| "logits/chosen": 0.1614990234375, |
| "logits/rejected": 0.04248046875, |
| "logps/chosen": -526.4000244140625, |
| "logps/rejected": -575.4000244140625, |
| "loss": 0.5252, |
| "nll_loss": 1.343359351158142, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 1.203125, |
| "rewards/margins": 1.326440453529358, |
| "rewards/rejected": -0.12354736030101776, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.2047005307050796, |
| "grad_norm": 65.27787243249955, |
| "learning_rate": 4.4229149115417014e-07, |
| "logits/chosen": 0.17680664360523224, |
| "logits/rejected": 0.04179687425494194, |
| "logps/chosen": -528.2000122070312, |
| "logps/rejected": -584.0, |
| "loss": 0.4598, |
| "nll_loss": 1.197656273841858, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 1.369140625, |
| "rewards/margins": 1.4425780773162842, |
| "rewards/rejected": -0.07305908203125, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.21228203184230476, |
| "grad_norm": 201.96030634728163, |
| "learning_rate": 4.3807919123841616e-07, |
| "logits/chosen": 0.2735351622104645, |
| "logits/rejected": 0.13618774712085724, |
| "logps/chosen": -413.1000061035156, |
| "logps/rejected": -387.8999938964844, |
| "loss": 0.4424, |
| "nll_loss": 1.1570312976837158, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.322656273841858, |
| "rewards/margins": 1.4929687976837158, |
| "rewards/rejected": -0.17158202826976776, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.21986353297952996, |
| "grad_norm": 244.51972827175538, |
| "learning_rate": 4.338668913226621e-07, |
| "logits/chosen": 0.17041015625, |
| "logits/rejected": 0.04808349534869194, |
| "logps/chosen": -576.2000122070312, |
| "logps/rejected": -552.7999877929688, |
| "loss": 0.4339, |
| "nll_loss": 1.224218726158142, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.2804687023162842, |
| "rewards/margins": 1.711328148841858, |
| "rewards/rejected": -0.4312988221645355, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.22744503411675512, |
| "grad_norm": 160.90356259462718, |
| "learning_rate": 4.2965459140690813e-07, |
| "logits/chosen": 0.14516600966453552, |
| "logits/rejected": 0.06256103515625, |
| "logps/chosen": -564.4000244140625, |
| "logps/rejected": -541.4000244140625, |
| "loss": 0.416, |
| "nll_loss": 1.3507812023162842, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.1989257335662842, |
| "rewards/margins": 1.465234398841858, |
| "rewards/rejected": -0.2666015625, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.2350265352539803, |
| "grad_norm": 122.92067457976098, |
| "learning_rate": 4.2544229149115415e-07, |
| "logits/chosen": 0.16321411728858948, |
| "logits/rejected": 0.02617187425494194, |
| "logps/chosen": -500.6000061035156, |
| "logps/rejected": -510.20001220703125, |
| "loss": 0.5279, |
| "nll_loss": 1.127343773841858, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.3679687976837158, |
| "rewards/margins": 1.8019530773162842, |
| "rewards/rejected": -0.43256837129592896, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.24260803639120546, |
| "grad_norm": 111.42348359831298, |
| "learning_rate": 4.212299915754001e-07, |
| "logits/chosen": 0.20351561903953552, |
| "logits/rejected": 0.10941161960363388, |
| "logps/chosen": -626.2000122070312, |
| "logps/rejected": -613.7999877929688, |
| "loss": 0.4295, |
| "nll_loss": 1.22265625, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.866992175579071, |
| "rewards/margins": 1.7000000476837158, |
| "rewards/rejected": -0.8323730230331421, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.25018953752843065, |
| "grad_norm": 95.27457125391763, |
| "learning_rate": 4.170176916596461e-07, |
| "logits/chosen": 0.26093751192092896, |
| "logits/rejected": 0.1373291015625, |
| "logps/chosen": -444.0, |
| "logps/rejected": -464.0, |
| "loss": 0.383, |
| "nll_loss": 1.2921874523162842, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.0666992664337158, |
| "rewards/margins": 1.975000023841858, |
| "rewards/rejected": -0.9068603515625, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.2577710386656558, |
| "grad_norm": 138.3345438826051, |
| "learning_rate": 4.128053917438922e-07, |
| "logits/chosen": 0.2812744081020355, |
| "logits/rejected": 0.12603759765625, |
| "logps/chosen": -425.79998779296875, |
| "logps/rejected": -443.79998779296875, |
| "loss": 0.3626, |
| "nll_loss": 1.155859351158142, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.916552722454071, |
| "rewards/margins": 2.116406202316284, |
| "rewards/rejected": -1.199462890625, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.265352539802881, |
| "grad_norm": 248.2578962865936, |
| "learning_rate": 4.0859309182813815e-07, |
| "logits/chosen": 0.19716186821460724, |
| "logits/rejected": -0.0211181640625, |
| "logps/chosen": -702.5999755859375, |
| "logps/rejected": -697.4000244140625, |
| "loss": 0.4761, |
| "nll_loss": 1.3152344226837158, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.0244140625, |
| "rewards/margins": 1.7078125476837158, |
| "rewards/rejected": -0.683398425579071, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.27293404094010615, |
| "grad_norm": 165.199706750475, |
| "learning_rate": 4.0438079191238417e-07, |
| "logits/chosen": 0.20786742866039276, |
| "logits/rejected": 0.07772216945886612, |
| "logps/chosen": -603.0, |
| "logps/rejected": -621.4000244140625, |
| "loss": 0.4578, |
| "nll_loss": 1.168359398841858, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.0081055164337158, |
| "rewards/margins": 1.6279296875, |
| "rewards/rejected": -0.62451171875, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.2805155420773313, |
| "grad_norm": 121.96006947274803, |
| "learning_rate": 4.001684919966302e-07, |
| "logits/chosen": 0.2735351622104645, |
| "logits/rejected": 0.09089355170726776, |
| "logps/chosen": -439.0, |
| "logps/rejected": -460.20001220703125, |
| "loss": 0.3829, |
| "nll_loss": 1.330468773841858, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 0.996289074420929, |
| "rewards/margins": 1.541015625, |
| "rewards/rejected": -0.5445801019668579, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2880970432145565, |
| "grad_norm": 182.5442973440352, |
| "learning_rate": 3.9595619208087615e-07, |
| "logits/chosen": 0.1871185302734375, |
| "logits/rejected": 0.02005615271627903, |
| "logps/chosen": -427.0, |
| "logps/rejected": -429.79998779296875, |
| "loss": 0.3873, |
| "nll_loss": 1.224218726158142, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 1.036474585533142, |
| "rewards/margins": 2.098437547683716, |
| "rewards/rejected": -1.060937523841858, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.29567854435178165, |
| "grad_norm": 265.42006162215375, |
| "learning_rate": 3.9174389216512216e-07, |
| "logits/chosen": 0.12451171875, |
| "logits/rejected": -0.06752929836511612, |
| "logps/chosen": -522.5999755859375, |
| "logps/rejected": -547.0999755859375, |
| "loss": 0.3694, |
| "nll_loss": 1.4851562976837158, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 0.9535156488418579, |
| "rewards/margins": 2.038378953933716, |
| "rewards/rejected": -1.085546851158142, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3032600454890068, |
| "grad_norm": 121.52240610984677, |
| "learning_rate": 3.875315922493682e-07, |
| "logits/chosen": 0.21596679091453552, |
| "logits/rejected": 0.08803711086511612, |
| "logps/chosen": -522.2000122070312, |
| "logps/rejected": -518.4000244140625, |
| "loss": 0.3509, |
| "nll_loss": 1.23828125, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.262304663658142, |
| "rewards/margins": 2.3251953125, |
| "rewards/rejected": -1.0622069835662842, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.310841546626232, |
| "grad_norm": 198.63570140647747, |
| "learning_rate": 3.8331929233361414e-07, |
| "logits/chosen": 0.26396483182907104, |
| "logits/rejected": 0.04107666015625, |
| "logps/chosen": -541.0, |
| "logps/rejected": -614.5999755859375, |
| "loss": 0.4102, |
| "nll_loss": 1.206640601158142, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.7144531011581421, |
| "rewards/margins": 1.920312523841858, |
| "rewards/rejected": -1.2052490711212158, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.31842304776345715, |
| "grad_norm": 124.7572195361246, |
| "learning_rate": 3.7910699241786015e-07, |
| "logits/chosen": 0.12045898288488388, |
| "logits/rejected": 0.06239013746380806, |
| "logps/chosen": -647.5999755859375, |
| "logps/rejected": -664.7999877929688, |
| "loss": 0.4872, |
| "nll_loss": 1.5203125476837158, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.3798828125, |
| "rewards/margins": 1.7646484375, |
| "rewards/rejected": -1.384179711341858, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.3260045489006823, |
| "grad_norm": 303.88739862771314, |
| "learning_rate": 3.7489469250210617e-07, |
| "logits/chosen": 0.13037109375, |
| "logits/rejected": -0.02709350548684597, |
| "logps/chosen": -591.0, |
| "logps/rejected": -558.7999877929688, |
| "loss": 0.4929, |
| "nll_loss": 1.4070312976837158, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.7420898675918579, |
| "rewards/margins": 1.9757812023162842, |
| "rewards/rejected": -1.232421875, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.33358605003790753, |
| "grad_norm": 339.23070637711777, |
| "learning_rate": 3.7068239258635213e-07, |
| "logits/chosen": 0.24921874701976776, |
| "logits/rejected": 0.11030273139476776, |
| "logps/chosen": -608.0999755859375, |
| "logps/rejected": -606.5999755859375, |
| "loss": 0.6025, |
| "nll_loss": 1.1183593273162842, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.6957031488418579, |
| "rewards/margins": 1.5625, |
| "rewards/rejected": -0.8663085699081421, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.3411675511751327, |
| "grad_norm": 196.8785396662436, |
| "learning_rate": 3.6647009267059814e-07, |
| "logits/chosen": 0.15043945610523224, |
| "logits/rejected": 0.01263427734375, |
| "logps/chosen": -525.4000244140625, |
| "logps/rejected": -506.79998779296875, |
| "loss": 0.3875, |
| "nll_loss": 1.2882812023162842, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.885937511920929, |
| "rewards/margins": 2.108203172683716, |
| "rewards/rejected": -1.2228515148162842, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.34874905231235787, |
| "grad_norm": 129.86600239128992, |
| "learning_rate": 3.6225779275484416e-07, |
| "logits/chosen": 0.09770508110523224, |
| "logits/rejected": -0.0045104981400072575, |
| "logps/chosen": -568.5999755859375, |
| "logps/rejected": -582.5999755859375, |
| "loss": 0.3584, |
| "nll_loss": 1.3078124523162842, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 0.759228527545929, |
| "rewards/margins": 2.2353515625, |
| "rewards/rejected": -1.474609375, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.35633055344958303, |
| "grad_norm": 198.9132732417745, |
| "learning_rate": 3.580454928390901e-07, |
| "logits/chosen": 0.2828125059604645, |
| "logits/rejected": 0.06529541313648224, |
| "logps/chosen": -569.5999755859375, |
| "logps/rejected": -673.0, |
| "loss": 1.691, |
| "nll_loss": 1.230078101158142, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 1.353906273841858, |
| "rewards/margins": 0.9765625, |
| "rewards/rejected": 0.3705078065395355, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.3639120545868082, |
| "grad_norm": 162.93774383329688, |
| "learning_rate": 3.5383319292333613e-07, |
| "logits/chosen": 0.22368164360523224, |
| "logits/rejected": 0.13312987983226776, |
| "logps/chosen": -472.20001220703125, |
| "logps/rejected": -433.20001220703125, |
| "loss": 0.3808, |
| "nll_loss": 1.1730468273162842, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.345703125, |
| "rewards/margins": 1.978906273841858, |
| "rewards/rejected": -0.63232421875, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.37149355572403336, |
| "grad_norm": 314.69450736010833, |
| "learning_rate": 3.4962089300758215e-07, |
| "logits/chosen": 0.19184570014476776, |
| "logits/rejected": 0.01174316368997097, |
| "logps/chosen": -516.5999755859375, |
| "logps/rejected": -530.0, |
| "loss": 0.5622, |
| "nll_loss": 1.2273437976837158, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 1.878515601158142, |
| "rewards/margins": 1.8523437976837158, |
| "rewards/rejected": 0.02460937574505806, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.37907505686125853, |
| "grad_norm": 127.11791996065408, |
| "learning_rate": 3.454085930918281e-07, |
| "logits/chosen": 0.32811278104782104, |
| "logits/rejected": 0.15139159560203552, |
| "logps/chosen": -514.0, |
| "logps/rejected": -559.5999755859375, |
| "loss": 0.4157, |
| "nll_loss": 1.1749999523162842, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.127539038658142, |
| "rewards/margins": 1.6062500476837158, |
| "rewards/rejected": -0.4796142578125, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3866565579984837, |
| "grad_norm": 110.73418365791827, |
| "learning_rate": 3.411962931760741e-07, |
| "logits/chosen": 0.16527099907398224, |
| "logits/rejected": -0.002532958984375, |
| "logps/chosen": -615.5999755859375, |
| "logps/rejected": -609.4000244140625, |
| "loss": 0.3923, |
| "nll_loss": 1.234375, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.1682708263397217, |
| "rewards/margins": 1.9921875, |
| "rewards/rejected": -0.822509765625, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.39423805913570886, |
| "grad_norm": 113.57088735550161, |
| "learning_rate": 3.3698399326032014e-07, |
| "logits/chosen": 0.13564452528953552, |
| "logits/rejected": -0.02927246131002903, |
| "logps/chosen": -630.9000244140625, |
| "logps/rejected": -627.5999755859375, |
| "loss": 0.3565, |
| "nll_loss": 1.330468773841858, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.7025390863418579, |
| "rewards/margins": 2.2406249046325684, |
| "rewards/rejected": -1.537500023841858, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.40181956027293403, |
| "grad_norm": 114.88993123662492, |
| "learning_rate": 3.327716933445661e-07, |
| "logits/chosen": 0.2532714903354645, |
| "logits/rejected": 0.14990234375, |
| "logps/chosen": -532.2000122070312, |
| "logps/rejected": -546.4000244140625, |
| "loss": 0.3645, |
| "nll_loss": 1.03125, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 0.9659668207168579, |
| "rewards/margins": 2.059375047683716, |
| "rewards/rejected": -1.091894507408142, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.4094010614101592, |
| "grad_norm": 140.5881761703118, |
| "learning_rate": 3.285593934288121e-07, |
| "logits/chosen": 0.24760742485523224, |
| "logits/rejected": 0.0767822265625, |
| "logps/chosen": -570.0, |
| "logps/rejected": -560.9000244140625, |
| "loss": 0.3599, |
| "nll_loss": 1.072656273841858, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.642431616783142, |
| "rewards/margins": 2.5054688453674316, |
| "rewards/rejected": -0.8681640625, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.41698256254738436, |
| "grad_norm": 124.0882733071695, |
| "learning_rate": 3.2434709351305813e-07, |
| "logits/chosen": 0.19780273735523224, |
| "logits/rejected": 0.09107665717601776, |
| "logps/chosen": -652.7999877929688, |
| "logps/rejected": -671.4000244140625, |
| "loss": 0.4856, |
| "nll_loss": 1.3191406726837158, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.3234374523162842, |
| "rewards/margins": 1.916015625, |
| "rewards/rejected": -0.5931396484375, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.4245640636846095, |
| "grad_norm": 72.26271440273402, |
| "learning_rate": 3.201347935973041e-07, |
| "logits/chosen": 0.16713866591453552, |
| "logits/rejected": 0.04147949069738388, |
| "logps/chosen": -532.2000122070312, |
| "logps/rejected": -541.0, |
| "loss": 0.3617, |
| "nll_loss": 1.1613280773162842, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.2734375, |
| "rewards/margins": 2.1117186546325684, |
| "rewards/rejected": -0.837597668170929, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.43214556482183475, |
| "grad_norm": 150.34076527214728, |
| "learning_rate": 3.159224936815501e-07, |
| "logits/chosen": 0.23625488579273224, |
| "logits/rejected": 0.05903320387005806, |
| "logps/chosen": -489.79998779296875, |
| "logps/rejected": -441.5, |
| "loss": 0.4574, |
| "nll_loss": 1.159765601158142, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.4072265625, |
| "rewards/margins": 1.7890625, |
| "rewards/rejected": -0.3818359375, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.4397270659590599, |
| "grad_norm": 153.1693060134136, |
| "learning_rate": 3.117101937657961e-07, |
| "logits/chosen": 0.23941650986671448, |
| "logits/rejected": 0.04136962816119194, |
| "logps/chosen": -540.2000122070312, |
| "logps/rejected": -460.0, |
| "loss": 0.3712, |
| "nll_loss": 1.142187476158142, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.1106445789337158, |
| "rewards/margins": 2.3414063453674316, |
| "rewards/rejected": -1.228906273841858, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.4473085670962851, |
| "grad_norm": 194.82248152259902, |
| "learning_rate": 3.074978938500421e-07, |
| "logits/chosen": 0.17299804091453552, |
| "logits/rejected": 0.06650390475988388, |
| "logps/chosen": -717.0, |
| "logps/rejected": -706.2000122070312, |
| "loss": 0.4601, |
| "nll_loss": 1.144921898841858, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 1.5226562023162842, |
| "rewards/margins": 1.931640625, |
| "rewards/rejected": -0.41069334745407104, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.45489006823351025, |
| "grad_norm": 130.26234365994824, |
| "learning_rate": 3.032855939342881e-07, |
| "logits/chosen": 0.2310791015625, |
| "logits/rejected": 0.10465087741613388, |
| "logps/chosen": -544.4000244140625, |
| "logps/rejected": -530.7999877929688, |
| "loss": 0.3822, |
| "nll_loss": 1.168359398841858, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.578125, |
| "rewards/margins": 2.103515625, |
| "rewards/rejected": -0.527148425579071, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.4624715693707354, |
| "grad_norm": 771.0753113803045, |
| "learning_rate": 2.990732940185341e-07, |
| "logits/chosen": 0.26826173067092896, |
| "logits/rejected": 0.116119384765625, |
| "logps/chosen": -585.2000122070312, |
| "logps/rejected": -631.0, |
| "loss": 0.5163, |
| "nll_loss": 1.3234374523162842, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.073632836341858, |
| "rewards/margins": 1.968359351158142, |
| "rewards/rejected": -0.895214855670929, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.4700530705079606, |
| "grad_norm": 146.97598084251982, |
| "learning_rate": 2.948609941027801e-07, |
| "logits/chosen": 0.03544921800494194, |
| "logits/rejected": -0.01901855506002903, |
| "logps/chosen": -692.5999755859375, |
| "logps/rejected": -693.5999755859375, |
| "loss": 0.3872, |
| "nll_loss": 1.3894531726837158, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.142578125, |
| "rewards/margins": 2.372265577316284, |
| "rewards/rejected": -1.22998046875, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.47763457164518575, |
| "grad_norm": 231.26312952989144, |
| "learning_rate": 2.906486941870261e-07, |
| "logits/chosen": 0.11240234225988388, |
| "logits/rejected": 0.06850586086511612, |
| "logps/chosen": -679.5999755859375, |
| "logps/rejected": -561.5999755859375, |
| "loss": 0.4133, |
| "nll_loss": 1.369531273841858, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.1198608875274658, |
| "rewards/margins": 2.32421875, |
| "rewards/rejected": -1.2004883289337158, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.4852160727824109, |
| "grad_norm": 228.98492770996447, |
| "learning_rate": 2.864363942712721e-07, |
| "logits/chosen": 0.12601622939109802, |
| "logits/rejected": 0.012011718936264515, |
| "logps/chosen": -680.2000122070312, |
| "logps/rejected": -615.4000244140625, |
| "loss": 0.5076, |
| "nll_loss": 1.203515648841858, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 1.0344727039337158, |
| "rewards/margins": 1.968359351158142, |
| "rewards/rejected": -0.933398425579071, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4927975739196361, |
| "grad_norm": 101.98305639734917, |
| "learning_rate": 2.8222409435551807e-07, |
| "logits/chosen": 0.24750976264476776, |
| "logits/rejected": 0.10369262844324112, |
| "logps/chosen": -527.2000122070312, |
| "logps/rejected": -538.0, |
| "loss": 0.5104, |
| "nll_loss": 1.207421898841858, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.8055664300918579, |
| "rewards/margins": 1.642187476158142, |
| "rewards/rejected": -0.8370116949081421, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5003790750568613, |
| "grad_norm": 86.66361253377694, |
| "learning_rate": 2.780117944397641e-07, |
| "logits/chosen": 0.23603515326976776, |
| "logits/rejected": 0.10183105617761612, |
| "logps/chosen": -427.79998779296875, |
| "logps/rejected": -430.79998779296875, |
| "loss": 0.3578, |
| "nll_loss": 1.2980468273162842, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.646484375, |
| "rewards/margins": 2.233593702316284, |
| "rewards/rejected": -0.5870116949081421, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5079605761940864, |
| "grad_norm": 154.46017727412266, |
| "learning_rate": 2.737994945240101e-07, |
| "logits/chosen": 0.24692383408546448, |
| "logits/rejected": 0.05348510667681694, |
| "logps/chosen": -557.7999877929688, |
| "logps/rejected": -567.0, |
| "loss": 0.3512, |
| "nll_loss": 1.154687523841858, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 2.2105469703674316, |
| "rewards/margins": 2.391406297683716, |
| "rewards/rejected": -0.18050536513328552, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.5155420773313116, |
| "grad_norm": 269.5164271871116, |
| "learning_rate": 2.6958719460825606e-07, |
| "logits/chosen": 0.18512573838233948, |
| "logits/rejected": 0.02484130859375, |
| "logps/chosen": -563.5999755859375, |
| "logps/rejected": -601.2000122070312, |
| "loss": 0.4837, |
| "nll_loss": 1.1749999523162842, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.6671874523162842, |
| "rewards/margins": 1.5674316883087158, |
| "rewards/rejected": 0.09707031399011612, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.5231235784685367, |
| "grad_norm": 260.79714861404403, |
| "learning_rate": 2.653748946925021e-07, |
| "logits/chosen": 0.17792968451976776, |
| "logits/rejected": 0.00091552734375, |
| "logps/chosen": -538.2000122070312, |
| "logps/rejected": -570.2000122070312, |
| "loss": 0.4087, |
| "nll_loss": 1.2804687023162842, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.5203125476837158, |
| "rewards/margins": 2.2164063453674316, |
| "rewards/rejected": -0.69793701171875, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.530705079605762, |
| "grad_norm": 77.58328940429827, |
| "learning_rate": 2.611625947767481e-07, |
| "logits/chosen": 0.19677734375, |
| "logits/rejected": 0.01052246056497097, |
| "logps/chosen": -472.20001220703125, |
| "logps/rejected": -511.79998779296875, |
| "loss": 0.4354, |
| "nll_loss": 1.3624999523162842, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 1.189550757408142, |
| "rewards/margins": 2.3046875, |
| "rewards/rejected": -1.11328125, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.5382865807429871, |
| "grad_norm": 189.1893865483772, |
| "learning_rate": 2.5695029486099405e-07, |
| "logits/chosen": 0.16862793266773224, |
| "logits/rejected": 0.10245361179113388, |
| "logps/chosen": -507.0, |
| "logps/rejected": -519.7999877929688, |
| "loss": 0.5728, |
| "nll_loss": 1.277734398841858, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.6670166254043579, |
| "rewards/margins": 1.80126953125, |
| "rewards/rejected": -1.134374976158142, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.5458680818802123, |
| "grad_norm": 68.61904191897719, |
| "learning_rate": 2.5273799494524007e-07, |
| "logits/chosen": 0.25273436307907104, |
| "logits/rejected": 0.10590820014476776, |
| "logps/chosen": -478.29998779296875, |
| "logps/rejected": -528.2000122070312, |
| "loss": 0.4332, |
| "nll_loss": 1.21875, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.42304688692092896, |
| "rewards/margins": 2.076367139816284, |
| "rewards/rejected": -1.6535155773162842, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.5534495830174374, |
| "grad_norm": 157.38793937778374, |
| "learning_rate": 2.485256950294861e-07, |
| "logits/chosen": 0.19511719048023224, |
| "logits/rejected": 0.06821288913488388, |
| "logps/chosen": -391.79998779296875, |
| "logps/rejected": -453.0, |
| "loss": 0.4128, |
| "nll_loss": 1.2273437976837158, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.0950195789337158, |
| "rewards/margins": 1.953125, |
| "rewards/rejected": -0.858593761920929, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.5610310841546626, |
| "grad_norm": 83.09266555988135, |
| "learning_rate": 2.443133951137321e-07, |
| "logits/chosen": 0.15876464545726776, |
| "logits/rejected": -0.01612548902630806, |
| "logps/chosen": -470.20001220703125, |
| "logps/rejected": -466.79998779296875, |
| "loss": 0.2883, |
| "nll_loss": 1.114843726158142, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 1.08984375, |
| "rewards/margins": 2.4671874046325684, |
| "rewards/rejected": -1.376367211341858, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.5686125852918877, |
| "grad_norm": 136.52742231810836, |
| "learning_rate": 2.4010109519797806e-07, |
| "logits/chosen": 0.24506835639476776, |
| "logits/rejected": -0.0029052733443677425, |
| "logps/chosen": -485.79998779296875, |
| "logps/rejected": -464.3999938964844, |
| "loss": 0.3478, |
| "nll_loss": 1.302343726158142, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.229882836341858, |
| "rewards/margins": 2.585156202316284, |
| "rewards/rejected": -1.3562500476837158, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.576194086429113, |
| "grad_norm": 100.82715249225713, |
| "learning_rate": 2.3588879528222407e-07, |
| "logits/chosen": 0.21192626655101776, |
| "logits/rejected": 0.06955566257238388, |
| "logps/chosen": -527.0, |
| "logps/rejected": -494.3999938964844, |
| "loss": 0.3816, |
| "nll_loss": 1.222265601158142, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.437890648841858, |
| "rewards/margins": 2.26953125, |
| "rewards/rejected": -0.832714855670929, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.5837755875663382, |
| "grad_norm": 147.57950000370568, |
| "learning_rate": 2.316764953664701e-07, |
| "logits/chosen": 0.08811035007238388, |
| "logits/rejected": -0.0233154296875, |
| "logps/chosen": -573.5999755859375, |
| "logps/rejected": -605.4000244140625, |
| "loss": 0.4853, |
| "nll_loss": 1.2917969226837158, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.7355468273162842, |
| "rewards/margins": 2.079296827316284, |
| "rewards/rejected": -0.3466796875, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.5913570887035633, |
| "grad_norm": 31.889468406083985, |
| "learning_rate": 2.2746419545071608e-07, |
| "logits/chosen": 0.11967773735523224, |
| "logits/rejected": 0.01263427734375, |
| "logps/chosen": -496.6000061035156, |
| "logps/rejected": -455.6000061035156, |
| "loss": 0.3848, |
| "nll_loss": 1.337890625, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.70703125, |
| "rewards/margins": 2.2591795921325684, |
| "rewards/rejected": -0.55078125, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.5989385898407885, |
| "grad_norm": 136.6171237288431, |
| "learning_rate": 2.2325189553496206e-07, |
| "logits/chosen": 0.23629149794578552, |
| "logits/rejected": 0.123291015625, |
| "logps/chosen": -495.3999938964844, |
| "logps/rejected": -505.0, |
| "loss": 0.3264, |
| "nll_loss": 1.367578148841858, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 1.6804687976837158, |
| "rewards/margins": 2.444531202316284, |
| "rewards/rejected": -0.7638183832168579, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.6065200909780136, |
| "grad_norm": 169.16069886552995, |
| "learning_rate": 2.1903959561920808e-07, |
| "logits/chosen": 0.18515625596046448, |
| "logits/rejected": 0.00864257849752903, |
| "logps/chosen": -416.20001220703125, |
| "logps/rejected": -472.20001220703125, |
| "loss": 0.3884, |
| "nll_loss": 1.1886718273162842, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.984765648841858, |
| "rewards/margins": 2.346874952316284, |
| "rewards/rejected": -0.36259764432907104, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.6141015921152388, |
| "grad_norm": 300.451641237815, |
| "learning_rate": 2.1482729570345407e-07, |
| "logits/chosen": 0.22788086533546448, |
| "logits/rejected": 0.08359374850988388, |
| "logps/chosen": -546.7999877929688, |
| "logps/rejected": -531.5999755859375, |
| "loss": 0.4231, |
| "nll_loss": 1.183984398841858, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.3076171875, |
| "rewards/margins": 2.2835936546325684, |
| "rewards/rejected": -0.977038562297821, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.621683093252464, |
| "grad_norm": 183.86035293603638, |
| "learning_rate": 2.1061499578770005e-07, |
| "logits/chosen": 0.21928711235523224, |
| "logits/rejected": 0.07795409858226776, |
| "logps/chosen": -462.79998779296875, |
| "logps/rejected": -428.6000061035156, |
| "loss": 0.3567, |
| "nll_loss": 1.178125023841858, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.20458984375, |
| "rewards/margins": 2.129687547683716, |
| "rewards/rejected": -0.925891101360321, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.6292645943896892, |
| "grad_norm": 120.21446058772034, |
| "learning_rate": 2.064026958719461e-07, |
| "logits/chosen": 0.15007324516773224, |
| "logits/rejected": -0.02866210974752903, |
| "logps/chosen": -534.9000244140625, |
| "logps/rejected": -520.5999755859375, |
| "loss": 0.366, |
| "nll_loss": 1.256250023841858, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.5652344226837158, |
| "rewards/margins": 2.6624999046325684, |
| "rewards/rejected": -1.100000023841858, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.6368460955269143, |
| "grad_norm": 153.8126911904177, |
| "learning_rate": 2.0219039595619208e-07, |
| "logits/chosen": 0.13383789360523224, |
| "logits/rejected": 0.04542846605181694, |
| "logps/chosen": -485.20001220703125, |
| "logps/rejected": -536.7999877929688, |
| "loss": 0.4412, |
| "nll_loss": 1.170312523841858, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.072851538658142, |
| "rewards/margins": 2.020214796066284, |
| "rewards/rejected": -0.9491211175918579, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.6444275966641395, |
| "grad_norm": 121.8986484261781, |
| "learning_rate": 1.9797809604043807e-07, |
| "logits/chosen": 0.16423949599266052, |
| "logits/rejected": -0.01621093787252903, |
| "logps/chosen": -535.5999755859375, |
| "logps/rejected": -534.2000122070312, |
| "loss": 0.4136, |
| "nll_loss": 1.3347656726837158, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.1912109851837158, |
| "rewards/margins": 2.6781249046325684, |
| "rewards/rejected": -1.484765648841858, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.6520090978013646, |
| "grad_norm": 107.32753219029611, |
| "learning_rate": 1.937657961246841e-07, |
| "logits/chosen": 0.24458007514476776, |
| "logits/rejected": -0.0005981445428915322, |
| "logps/chosen": -509.0, |
| "logps/rejected": -537.9000244140625, |
| "loss": 0.4031, |
| "nll_loss": 1.085546851158142, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.205078125, |
| "rewards/margins": 2.5023436546325684, |
| "rewards/rejected": -1.293359398841858, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.6595905989385898, |
| "grad_norm": 199.56179373972668, |
| "learning_rate": 1.8955349620893008e-07, |
| "logits/chosen": 0.16865234076976776, |
| "logits/rejected": 0.01300659216940403, |
| "logps/chosen": -501.3999938964844, |
| "logps/rejected": -553.7999877929688, |
| "loss": 0.5141, |
| "nll_loss": 1.271875023841858, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.2307617664337158, |
| "rewards/margins": 2.220898389816284, |
| "rewards/rejected": -0.989794909954071, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.6671721000758151, |
| "grad_norm": 145.67471682071726, |
| "learning_rate": 1.8534119629317606e-07, |
| "logits/chosen": 0.14238281548023224, |
| "logits/rejected": 0.04350585862994194, |
| "logps/chosen": -609.0, |
| "logps/rejected": -608.7999877929688, |
| "loss": 0.4773, |
| "nll_loss": 1.243749976158142, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.129370093345642, |
| "rewards/margins": 1.8621094226837158, |
| "rewards/rejected": -0.7308593988418579, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.6747536012130402, |
| "grad_norm": 196.73193775310398, |
| "learning_rate": 1.8112889637742208e-07, |
| "logits/chosen": 0.04483642429113388, |
| "logits/rejected": 0.0048583983443677425, |
| "logps/chosen": -693.7999877929688, |
| "logps/rejected": -666.5999755859375, |
| "loss": 0.6123, |
| "nll_loss": 1.2527344226837158, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.357812523841858, |
| "rewards/margins": 1.69140625, |
| "rewards/rejected": -0.33369141817092896, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.6823351023502654, |
| "grad_norm": 194.77536214548843, |
| "learning_rate": 1.7691659646166807e-07, |
| "logits/chosen": 0.14555664360523224, |
| "logits/rejected": 0.04428710788488388, |
| "logps/chosen": -546.4000244140625, |
| "logps/rejected": -503.20001220703125, |
| "loss": 0.4322, |
| "nll_loss": 1.318750023841858, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.5364258289337158, |
| "rewards/margins": 2.186328172683716, |
| "rewards/rejected": -0.6504882574081421, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.6899166034874905, |
| "grad_norm": 175.15629650260274, |
| "learning_rate": 1.7270429654591406e-07, |
| "logits/chosen": 0.08806152641773224, |
| "logits/rejected": 0.008822632022202015, |
| "logps/chosen": -550.2000122070312, |
| "logps/rejected": -557.2000122070312, |
| "loss": 0.4278, |
| "nll_loss": 1.269921898841858, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.422265648841858, |
| "rewards/margins": 2.3148436546325684, |
| "rewards/rejected": -0.8949218988418579, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.6974981046247157, |
| "grad_norm": 158.19405566519555, |
| "learning_rate": 1.6849199663016007e-07, |
| "logits/chosen": 0.11597900092601776, |
| "logits/rejected": -0.031158447265625, |
| "logps/chosen": -474.20001220703125, |
| "logps/rejected": -498.3999938964844, |
| "loss": 0.3356, |
| "nll_loss": 1.2820312976837158, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.6453125476837158, |
| "rewards/margins": 2.567187547683716, |
| "rewards/rejected": -0.9203125238418579, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.7050796057619408, |
| "grad_norm": 79.99219662946521, |
| "learning_rate": 1.6427969671440606e-07, |
| "logits/chosen": 0.1439208984375, |
| "logits/rejected": -0.03364257887005806, |
| "logps/chosen": -457.8999938964844, |
| "logps/rejected": -511.6000061035156, |
| "loss": 0.405, |
| "nll_loss": 1.286718726158142, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.412841796875, |
| "rewards/margins": 2.419140577316284, |
| "rewards/rejected": -1.007421851158142, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.7126611068991661, |
| "grad_norm": 160.68166830448138, |
| "learning_rate": 1.6006739679865205e-07, |
| "logits/chosen": 0.12910155951976776, |
| "logits/rejected": 0.02253418043255806, |
| "logps/chosen": -550.5999755859375, |
| "logps/rejected": -539.4000244140625, |
| "loss": 0.467, |
| "nll_loss": 1.023046851158142, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.1843750476837158, |
| "rewards/margins": 1.8195312023162842, |
| "rewards/rejected": -0.633984386920929, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.7202426080363912, |
| "grad_norm": 48.624487580082295, |
| "learning_rate": 1.5585509688289806e-07, |
| "logits/chosen": 0.11916504055261612, |
| "logits/rejected": 0.012451171875, |
| "logps/chosen": -501.6000061035156, |
| "logps/rejected": -440.1000061035156, |
| "loss": 0.42, |
| "nll_loss": 1.279296875, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.554101586341858, |
| "rewards/margins": 2.168750047683716, |
| "rewards/rejected": -0.6158202886581421, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.7278241091736164, |
| "grad_norm": 108.99945705457651, |
| "learning_rate": 1.5164279696714405e-07, |
| "logits/chosen": 0.14907225966453552, |
| "logits/rejected": 0.005786132998764515, |
| "logps/chosen": -491.79998779296875, |
| "logps/rejected": -485.20001220703125, |
| "loss": 0.4132, |
| "nll_loss": 1.2664062976837158, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.8162109851837158, |
| "rewards/margins": 2.458300828933716, |
| "rewards/rejected": -0.6415039300918579, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.7354056103108415, |
| "grad_norm": 67.28611974505353, |
| "learning_rate": 1.4743049705139004e-07, |
| "logits/chosen": 0.10041503608226776, |
| "logits/rejected": -0.009716796688735485, |
| "logps/chosen": -593.7999877929688, |
| "logps/rejected": -552.0, |
| "loss": 0.4633, |
| "nll_loss": 1.2531249523162842, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.6394531726837158, |
| "rewards/margins": 2.0875000953674316, |
| "rewards/rejected": -0.44902342557907104, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.7429871114480667, |
| "grad_norm": 192.60992136850282, |
| "learning_rate": 1.4321819713563605e-07, |
| "logits/chosen": 0.11516723781824112, |
| "logits/rejected": 0.006024169735610485, |
| "logps/chosen": -485.1000061035156, |
| "logps/rejected": -503.6000061035156, |
| "loss": 0.4476, |
| "nll_loss": 1.243749976158142, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.648046851158142, |
| "rewards/margins": 2.1099610328674316, |
| "rewards/rejected": -0.4625244140625, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.7505686125852918, |
| "grad_norm": 94.26112472676634, |
| "learning_rate": 1.3900589721988204e-07, |
| "logits/chosen": 0.06564941257238388, |
| "logits/rejected": -0.05050048977136612, |
| "logps/chosen": -572.0, |
| "logps/rejected": -521.4000244140625, |
| "loss": 0.3979, |
| "nll_loss": 1.314062476158142, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 2.239062547683716, |
| "rewards/margins": 2.651171922683716, |
| "rewards/rejected": -0.4076171815395355, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.7581501137225171, |
| "grad_norm": 116.49945744118433, |
| "learning_rate": 1.3479359730412803e-07, |
| "logits/chosen": 0.20512695610523224, |
| "logits/rejected": 0.11342773586511612, |
| "logps/chosen": -461.0, |
| "logps/rejected": -458.6000061035156, |
| "loss": 0.3895, |
| "nll_loss": 1.366796851158142, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.659765601158142, |
| "rewards/margins": 2.239062547683716, |
| "rewards/rejected": -0.5755370855331421, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.7657316148597423, |
| "grad_norm": 210.31730274263796, |
| "learning_rate": 1.3058129738837404e-07, |
| "logits/chosen": 0.20961913466453552, |
| "logits/rejected": 0.07167968899011612, |
| "logps/chosen": -475.79998779296875, |
| "logps/rejected": -519.9000244140625, |
| "loss": 0.3534, |
| "nll_loss": 1.1046874523162842, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.8623046875, |
| "rewards/margins": 2.400390625, |
| "rewards/rejected": -0.5356200933456421, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.7733131159969674, |
| "grad_norm": 103.41619948561777, |
| "learning_rate": 1.2636899747262003e-07, |
| "logits/chosen": 0.12003479152917862, |
| "logits/rejected": 0.03270263597369194, |
| "logps/chosen": -683.5999755859375, |
| "logps/rejected": -555.4000244140625, |
| "loss": 0.4201, |
| "nll_loss": 1.4210937023162842, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.8644530773162842, |
| "rewards/margins": 2.190234422683716, |
| "rewards/rejected": -0.32763671875, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.7808946171341926, |
| "grad_norm": 133.79887910214092, |
| "learning_rate": 1.2215669755686605e-07, |
| "logits/chosen": 0.09956054389476776, |
| "logits/rejected": 0.02629699744284153, |
| "logps/chosen": -606.0, |
| "logps/rejected": -603.0, |
| "loss": 0.4273, |
| "nll_loss": 1.2488281726837158, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.7316405773162842, |
| "rewards/margins": 2.490234375, |
| "rewards/rejected": -0.759570300579071, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.7884761182714177, |
| "grad_norm": 186.46816938496252, |
| "learning_rate": 1.1794439764111204e-07, |
| "logits/chosen": 0.12141112983226776, |
| "logits/rejected": 0.01883544959127903, |
| "logps/chosen": -555.5999755859375, |
| "logps/rejected": -588.0, |
| "loss": 0.3112, |
| "nll_loss": 1.269921898841858, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.5886719226837158, |
| "rewards/margins": 2.426562547683716, |
| "rewards/rejected": -0.837207019329071, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.796057619408643, |
| "grad_norm": 203.98306483470228, |
| "learning_rate": 1.1373209772535804e-07, |
| "logits/chosen": 0.21123047173023224, |
| "logits/rejected": 0.06298217922449112, |
| "logps/chosen": -525.4000244140625, |
| "logps/rejected": -522.0, |
| "loss": 0.4055, |
| "nll_loss": 1.1183593273162842, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.994531273841858, |
| "rewards/margins": 2.360546827316284, |
| "rewards/rejected": -0.36601561307907104, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.8036391205458681, |
| "grad_norm": 187.18328566144393, |
| "learning_rate": 1.0951979780960404e-07, |
| "logits/chosen": 0.10944823920726776, |
| "logits/rejected": 0.02037963829934597, |
| "logps/chosen": -590.4000244140625, |
| "logps/rejected": -583.0, |
| "loss": 0.4506, |
| "nll_loss": 1.330468773841858, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.444921851158142, |
| "rewards/margins": 2.3101563453674316, |
| "rewards/rejected": -0.86767578125, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.8112206216830933, |
| "grad_norm": 98.73681027058517, |
| "learning_rate": 1.0530749789385003e-07, |
| "logits/chosen": 0.10260619968175888, |
| "logits/rejected": -0.04973144456744194, |
| "logps/chosen": -541.5999755859375, |
| "logps/rejected": -571.5999755859375, |
| "loss": 0.3593, |
| "nll_loss": 1.21484375, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.3909180164337158, |
| "rewards/margins": 2.383593797683716, |
| "rewards/rejected": -0.995898425579071, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.8188021228203184, |
| "grad_norm": 33.39836753544675, |
| "learning_rate": 1.0109519797809604e-07, |
| "logits/chosen": 0.18569335341453552, |
| "logits/rejected": -0.02208251878619194, |
| "logps/chosen": -469.20001220703125, |
| "logps/rejected": -457.20001220703125, |
| "loss": 0.372, |
| "nll_loss": 1.341796875, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.908593773841858, |
| "rewards/margins": 2.8589844703674316, |
| "rewards/rejected": -0.948046863079071, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.8263836239575436, |
| "grad_norm": 125.04399201580806, |
| "learning_rate": 9.688289806234204e-08, |
| "logits/chosen": 0.15627440810203552, |
| "logits/rejected": 0.0027099610306322575, |
| "logps/chosen": -597.0, |
| "logps/rejected": -624.0, |
| "loss": 0.341, |
| "nll_loss": 1.235937476158142, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.831640601158142, |
| "rewards/margins": 2.4085936546325684, |
| "rewards/rejected": -0.5736328363418579, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.8339651250947687, |
| "grad_norm": 131.49101558985592, |
| "learning_rate": 9.267059814658803e-08, |
| "logits/chosen": 0.2294921875, |
| "logits/rejected": 0.06977538764476776, |
| "logps/chosen": -434.79998779296875, |
| "logps/rejected": -435.0, |
| "loss": 0.3921, |
| "nll_loss": 1.134765625, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.490234375, |
| "rewards/margins": 2.6937499046325684, |
| "rewards/rejected": -1.2062499523162842, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.8415466262319939, |
| "grad_norm": 119.02191047253005, |
| "learning_rate": 8.845829823083403e-08, |
| "logits/chosen": 0.293212890625, |
| "logits/rejected": 0.04813842847943306, |
| "logps/chosen": -467.20001220703125, |
| "logps/rejected": -505.6000061035156, |
| "loss": 0.3092, |
| "nll_loss": 1.173828125, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.607812523841858, |
| "rewards/margins": 2.729687452316284, |
| "rewards/rejected": -1.121038794517517, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.849128127369219, |
| "grad_norm": 149.05016302273359, |
| "learning_rate": 8.424599831508004e-08, |
| "logits/chosen": 0.004260254092514515, |
| "logits/rejected": 0.03310547024011612, |
| "logps/chosen": -566.2000122070312, |
| "logps/rejected": -556.4000244140625, |
| "loss": 0.4365, |
| "nll_loss": 1.365234375, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.508398413658142, |
| "rewards/margins": 2.2476563453674316, |
| "rewards/rejected": -0.7374023199081421, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.8567096285064443, |
| "grad_norm": 55.23109783038184, |
| "learning_rate": 8.003369839932602e-08, |
| "logits/chosen": 0.10483398288488388, |
| "logits/rejected": -0.04458007961511612, |
| "logps/chosen": -669.7000122070312, |
| "logps/rejected": -617.2000122070312, |
| "loss": 0.3465, |
| "nll_loss": 1.2605469226837158, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.783593773841858, |
| "rewards/margins": 2.4007811546325684, |
| "rewards/rejected": -0.6149657964706421, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.8642911296436695, |
| "grad_norm": 119.87227017716317, |
| "learning_rate": 7.582139848357203e-08, |
| "logits/chosen": 0.21893921494483948, |
| "logits/rejected": 0.0064941407181322575, |
| "logps/chosen": -496.20001220703125, |
| "logps/rejected": -506.0, |
| "loss": 0.4416, |
| "nll_loss": 1.226953148841858, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.5451171398162842, |
| "rewards/margins": 2.251171827316284, |
| "rewards/rejected": -0.706738293170929, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.8718726307808946, |
| "grad_norm": 125.7872093052735, |
| "learning_rate": 7.160909856781803e-08, |
| "logits/chosen": 0.14731445908546448, |
| "logits/rejected": 0.014862060546875, |
| "logps/chosen": -562.0, |
| "logps/rejected": -548.4000244140625, |
| "loss": 0.4338, |
| "nll_loss": 1.314062476158142, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.6384766101837158, |
| "rewards/margins": 2.094531297683716, |
| "rewards/rejected": -0.4574218690395355, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.8794541319181198, |
| "grad_norm": 155.24491691884234, |
| "learning_rate": 6.739679865206401e-08, |
| "logits/chosen": 0.22019043564796448, |
| "logits/rejected": 0.11717529594898224, |
| "logps/chosen": -518.7000122070312, |
| "logps/rejected": -484.3999938964844, |
| "loss": 0.3854, |
| "nll_loss": 1.26171875, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.605078101158142, |
| "rewards/margins": 2.059375047683716, |
| "rewards/rejected": -0.45521241426467896, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.8870356330553449, |
| "grad_norm": 72.26607344826763, |
| "learning_rate": 6.318449873631002e-08, |
| "logits/chosen": 0.16090087592601776, |
| "logits/rejected": -0.0035644532181322575, |
| "logps/chosen": -717.5, |
| "logps/rejected": -732.7999877929688, |
| "loss": 0.4008, |
| "nll_loss": 1.166406273841858, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 2.0093750953674316, |
| "rewards/margins": 2.692187547683716, |
| "rewards/rejected": -0.6851562261581421, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.8946171341925702, |
| "grad_norm": 108.94764921361714, |
| "learning_rate": 5.897219882055602e-08, |
| "logits/chosen": 0.16987304389476776, |
| "logits/rejected": -0.04276733472943306, |
| "logps/chosen": -556.5999755859375, |
| "logps/rejected": -527.5999755859375, |
| "loss": 0.3503, |
| "nll_loss": 1.2492187023162842, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.68017578125, |
| "rewards/margins": 2.442578077316284, |
| "rewards/rejected": -0.7623046636581421, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.9021986353297953, |
| "grad_norm": 131.0459749369346, |
| "learning_rate": 5.475989890480202e-08, |
| "logits/chosen": 0.21416015923023224, |
| "logits/rejected": -0.0015411376953125, |
| "logps/chosen": -501.3999938964844, |
| "logps/rejected": -516.2000122070312, |
| "loss": 0.3754, |
| "nll_loss": 1.2999999523162842, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.9015624523162842, |
| "rewards/margins": 2.109179735183716, |
| "rewards/rejected": -0.20556640625, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.9097801364670205, |
| "grad_norm": 307.9800162305656, |
| "learning_rate": 5.054759898904802e-08, |
| "logits/chosen": 0.10655517876148224, |
| "logits/rejected": 0.009765625, |
| "logps/chosen": -486.6000061035156, |
| "logps/rejected": -504.0, |
| "loss": 0.465, |
| "nll_loss": 1.3386719226837158, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 1.873437523841858, |
| "rewards/margins": 2.637500047683716, |
| "rewards/rejected": -0.762402355670929, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.9173616376042456, |
| "grad_norm": 133.27100734071354, |
| "learning_rate": 4.6335299073294016e-08, |
| "logits/chosen": 0.20000000298023224, |
| "logits/rejected": 0.10622558742761612, |
| "logps/chosen": -451.29998779296875, |
| "logps/rejected": -492.6000061035156, |
| "loss": 0.4231, |
| "nll_loss": 1.357421875, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.500390648841858, |
| "rewards/margins": 2.056640625, |
| "rewards/rejected": -0.5611327886581421, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.9249431387414708, |
| "grad_norm": 85.24520542811791, |
| "learning_rate": 4.212299915754002e-08, |
| "logits/chosen": 0.21176758408546448, |
| "logits/rejected": 0.04278564453125, |
| "logps/chosen": -466.3999938964844, |
| "logps/rejected": -525.0, |
| "loss": 0.3879, |
| "nll_loss": 1.3093750476837158, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.709375023841858, |
| "rewards/margins": 2.3140625953674316, |
| "rewards/rejected": -0.607861340045929, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.9325246398786959, |
| "grad_norm": 112.79958598714522, |
| "learning_rate": 3.791069924178601e-08, |
| "logits/chosen": 0.12949219346046448, |
| "logits/rejected": 0.00974121131002903, |
| "logps/chosen": -507.0, |
| "logps/rejected": -529.2000122070312, |
| "loss": 0.399, |
| "nll_loss": 1.314453125, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.6931641101837158, |
| "rewards/margins": 2.850781202316284, |
| "rewards/rejected": -1.158203125, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.9401061410159212, |
| "grad_norm": 176.59295105465074, |
| "learning_rate": 3.369839932603201e-08, |
| "logits/chosen": 0.11582031100988388, |
| "logits/rejected": -0.012280273251235485, |
| "logps/chosen": -528.2000122070312, |
| "logps/rejected": -531.7999877929688, |
| "loss": 0.3927, |
| "nll_loss": 1.235937476158142, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.801855444908142, |
| "rewards/margins": 2.326171875, |
| "rewards/rejected": -0.5223633050918579, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.9476876421531463, |
| "grad_norm": 146.1635844887063, |
| "learning_rate": 2.948609941027801e-08, |
| "logits/chosen": 0.23291015625, |
| "logits/rejected": 0.02499694749712944, |
| "logps/chosen": -417.79998779296875, |
| "logps/rejected": -488.20001220703125, |
| "loss": 0.3236, |
| "nll_loss": 1.2312500476837158, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.4695312976837158, |
| "rewards/margins": 2.3531250953674316, |
| "rewards/rejected": -0.8837890625, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.9552691432903715, |
| "grad_norm": 108.13435305388558, |
| "learning_rate": 2.527379949452401e-08, |
| "logits/chosen": 0.13677978515625, |
| "logits/rejected": 0.04533691331744194, |
| "logps/chosen": -525.0, |
| "logps/rejected": -642.0, |
| "loss": 0.3805, |
| "nll_loss": 1.3800780773162842, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 1.7277343273162842, |
| "rewards/margins": 2.5589842796325684, |
| "rewards/rejected": -0.8324218988418579, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.9628506444275967, |
| "grad_norm": 268.6347905626109, |
| "learning_rate": 2.106149957877001e-08, |
| "logits/chosen": 0.18927001953125, |
| "logits/rejected": 0.06524658203125, |
| "logps/chosen": -494.20001220703125, |
| "logps/rejected": -469.0, |
| "loss": 0.3914, |
| "nll_loss": 1.1515624523162842, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 1.841406226158142, |
| "rewards/margins": 2.49609375, |
| "rewards/rejected": -0.6563476324081421, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.9704321455648218, |
| "grad_norm": 88.73642966202097, |
| "learning_rate": 1.6849199663016004e-08, |
| "logits/chosen": 0.2381591796875, |
| "logits/rejected": 0.07663574069738388, |
| "logps/chosen": -524.2000122070312, |
| "logps/rejected": -565.7999877929688, |
| "loss": 0.439, |
| "nll_loss": 1.342187523841858, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 1.600976586341858, |
| "rewards/margins": 2.6089844703674316, |
| "rewards/rejected": -1.008203148841858, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.978013646702047, |
| "grad_norm": 110.05713639865995, |
| "learning_rate": 1.2636899747262005e-08, |
| "logits/chosen": 0.16679687798023224, |
| "logits/rejected": 0.04754638671875, |
| "logps/chosen": -456.3999938964844, |
| "logps/rejected": -462.6000061035156, |
| "loss": 0.4392, |
| "nll_loss": 1.216406226158142, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 1.694726586341858, |
| "rewards/margins": 2.328125, |
| "rewards/rejected": -0.631542980670929, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.9855951478392722, |
| "grad_norm": 166.5083302014064, |
| "learning_rate": 8.424599831508002e-09, |
| "logits/chosen": 0.1453857421875, |
| "logits/rejected": 0.04035644605755806, |
| "logps/chosen": -548.2000122070312, |
| "logps/rejected": -470.6000061035156, |
| "loss": 0.363, |
| "nll_loss": 1.1730468273162842, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": 1.3935546875, |
| "rewards/margins": 2.388671875, |
| "rewards/rejected": -0.994140625, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.9931766489764974, |
| "grad_norm": 118.78830054230653, |
| "learning_rate": 4.212299915754001e-09, |
| "logits/chosen": 0.14559325575828552, |
| "logits/rejected": 0.04741210862994194, |
| "logps/chosen": -647.0, |
| "logps/rejected": -675.4000244140625, |
| "loss": 0.4532, |
| "nll_loss": 1.234375, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 1.4243896007537842, |
| "rewards/margins": 2.1460938453674316, |
| "rewards/rejected": -0.7232910394668579, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_logits/chosen": 0.2664245665073395, |
| "eval_logits/rejected": 0.09165038913488388, |
| "eval_logps/chosen": -413.79998779296875, |
| "eval_logps/rejected": -411.79998779296875, |
| "eval_loss": 0.3644465506076813, |
| "eval_nll_loss": 1.1941406726837158, |
| "eval_rewards/accuracies": 0.762499988079071, |
| "eval_rewards/chosen": 1.34765625, |
| "eval_rewards/margins": 2.319140672683716, |
| "eval_rewards/rejected": -0.971630871295929, |
| "eval_runtime": 6.1397, |
| "eval_samples_per_second": 12.378, |
| "eval_steps_per_second": 1.629, |
| "step": 1319 |
| }, |
| { |
| "epoch": 1.0, |
| "step": 1319, |
| "total_flos": 0.0, |
| "train_loss": 0.45258238606601164, |
| "train_runtime": 2337.5511, |
| "train_samples_per_second": 4.512, |
| "train_steps_per_second": 0.564 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1319, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|