{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.4005668398677374, "eval_steps": 500, "global_step": 742, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000539847493083204, "grad_norm": 0.1942079821469081, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": -0.103759765625, "log_odds_ratio": -0.74951171875, "logits/chosen": 0.03173828125, "logits/rejected": 0.23210906982421875, "logps/chosen": -0.5159912109375, "logps/rejected": -0.4903564453125, "loss": 9.7798, "nll_loss": 0.52685546875, "rewards/accuracies": 0.0625, "rewards/chosen": -0.05159759521484375, "rewards/margins": -0.00254058837890625, "rewards/rejected": -0.0490570068359375, "step": 1 }, { "epoch": 0.001079694986166408, "grad_norm": 0.19925476484071833, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": -0.1312255859375, "log_odds_ratio": -0.762939453125, "logits/chosen": 0.0579986572265625, "logits/rejected": 0.242279052734375, "logps/chosen": -0.605224609375, "logps/rejected": -0.5435791015625, "loss": 9.5518, "nll_loss": 0.6063232421875, "rewards/accuracies": 0.0625, "rewards/chosen": -0.06048583984375, "rewards/margins": -0.0061187744140625, "rewards/rejected": -0.0543670654296875, "step": 2 }, { "epoch": 0.001619542479249612, "grad_norm": 0.21306866898113327, "learning_rate": 6e-06, "log_odds_chosen": -0.16070556640625, "log_odds_ratio": -0.780029296875, "logits/chosen": -0.0687255859375, "logits/rejected": 0.090667724609375, "logps/chosen": -0.460205078125, "logps/rejected": -0.403076171875, "loss": 9.3716, "nll_loss": 0.4815673828125, "rewards/accuracies": 0.0625, "rewards/chosen": -0.045989990234375, "rewards/margins": -0.00567626953125, "rewards/rejected": -0.040313720703125, "step": 3 }, { "epoch": 0.002159389972332816, "grad_norm": 0.19535862371047616, "learning_rate": 8.000000000000001e-06, "log_odds_chosen": -0.0491943359375, "log_odds_ratio": -0.7236328125, "logits/chosen": 0.23223876953125, "logits/rejected": 0.32306671142578125, "logps/chosen": -0.53228759765625, "logps/rejected": -0.51019287109375, "loss": 9.1763, "nll_loss": 0.56005859375, "rewards/accuracies": 0.125, "rewards/chosen": -0.0532379150390625, "rewards/margins": -0.00222015380859375, "rewards/rejected": -0.05101776123046875, "step": 4 }, { "epoch": 0.00269923746541602, "grad_norm": 0.21604279668851673, "learning_rate": 1e-05, "log_odds_chosen": -0.26092529296875, "log_odds_ratio": -0.841064453125, "logits/chosen": -0.0345458984375, "logits/rejected": 0.12232255935668945, "logps/chosen": -0.782958984375, "logps/rejected": -0.63299560546875, "loss": 10.3457, "nll_loss": 0.79022216796875, "rewards/accuracies": 0.0, "rewards/chosen": -0.07833099365234375, "rewards/margins": -0.0150604248046875, "rewards/rejected": -0.06327056884765625, "step": 5 }, { "epoch": 0.003239084958499224, "grad_norm": 0.17579180511913786, "learning_rate": 1.2e-05, "log_odds_chosen": -0.10174560546875, "log_odds_ratio": -0.74658203125, "logits/chosen": 0.181793212890625, "logits/rejected": 0.3216094970703125, "logps/chosen": -0.5556640625, "logps/rejected": -0.513671875, "loss": 9.1895, "nll_loss": 0.557861328125, "rewards/accuracies": 0.125, "rewards/chosen": -0.055572509765625, "rewards/margins": -0.00424957275390625, "rewards/rejected": -0.05132293701171875, "step": 6 }, { "epoch": 0.003778932451582428, "grad_norm": 0.17485322290856842, "learning_rate": 1.4e-05, "log_odds_chosen": -0.10986328125, "log_odds_ratio": -0.750244140625, "logits/chosen": -0.0716552734375, "logits/rejected": 0.04388427734375, "logps/chosen": -0.4420166015625, "logps/rejected": -0.4063720703125, "loss": 8.9697, "nll_loss": 0.44671630859375, "rewards/accuracies": 0.0, "rewards/chosen": -0.04419708251953125, "rewards/margins": -0.003566741943359375, "rewards/rejected": -0.040630340576171875, "step": 7 }, { "epoch": 0.004318779944665632, "grad_norm": 0.2136311486968946, "learning_rate": 1.6000000000000003e-05, "log_odds_chosen": -0.1390380859375, "log_odds_ratio": -0.767578125, "logits/chosen": -0.01244354248046875, "logits/rejected": 0.15679168701171875, "logps/chosen": -0.53466796875, "logps/rejected": -0.4752197265625, "loss": 10.0063, "nll_loss": 0.5389404296875, "rewards/accuracies": 0.0625, "rewards/chosen": -0.05348968505859375, "rewards/margins": -0.00592803955078125, "rewards/rejected": -0.0475616455078125, "step": 8 }, { "epoch": 0.004858627437748836, "grad_norm": 0.21896041080343484, "learning_rate": 1.8e-05, "log_odds_chosen": -0.08544921875, "log_odds_ratio": -0.738037109375, "logits/chosen": 0.17606353759765625, "logits/rejected": 0.291229248046875, "logps/chosen": -0.48907470703125, "logps/rejected": -0.45611572265625, "loss": 10.7256, "nll_loss": 0.49542236328125, "rewards/accuracies": 0.1875, "rewards/chosen": -0.04892730712890625, "rewards/margins": -0.00330352783203125, "rewards/rejected": -0.045623779296875, "step": 9 }, { "epoch": 0.00539847493083204, "grad_norm": 0.215856169878579, "learning_rate": 2e-05, "log_odds_chosen": -0.0986328125, "log_odds_ratio": -0.74609375, "logits/chosen": 0.196624755859375, "logits/rejected": 0.3164215087890625, "logps/chosen": -0.5498046875, "logps/rejected": -0.5125732421875, "loss": 9.9707, "nll_loss": 0.556640625, "rewards/accuracies": 0.0625, "rewards/chosen": -0.05498504638671875, "rewards/margins": -0.00374603271484375, "rewards/rejected": -0.051239013671875, "step": 10 }, { "epoch": 0.005938322423915244, "grad_norm": 0.2349629867119348, "learning_rate": 1.9999985471560335e-05, "log_odds_chosen": -0.0865478515625, "log_odds_ratio": -0.739990234375, "logits/chosen": 0.071441650390625, "logits/rejected": 0.12422370910644531, "logps/chosen": -0.535400390625, "logps/rejected": -0.510009765625, "loss": 9.8784, "nll_loss": 0.5374755859375, "rewards/accuracies": 0.1875, "rewards/chosen": -0.053558349609375, "rewards/margins": -0.00250244140625, "rewards/rejected": -0.051055908203125, "step": 11 }, { "epoch": 0.006478169916998448, "grad_norm": 0.21983986696361196, "learning_rate": 1.999994188628356e-05, "log_odds_chosen": -0.2069091796875, "log_odds_ratio": -0.808349609375, "logits/chosen": -0.042346954345703125, "logits/rejected": 0.13008928298950195, "logps/chosen": -0.69793701171875, "logps/rejected": -0.58380126953125, "loss": 9.9575, "nll_loss": 0.694580078125, "rewards/accuracies": 0.0, "rewards/chosen": -0.0698394775390625, "rewards/margins": -0.01143646240234375, "rewards/rejected": -0.05840301513671875, "step": 12 }, { "epoch": 0.007018017410081652, "grad_norm": 0.2092001336843407, "learning_rate": 1.9999869244296316e-05, "log_odds_chosen": -0.16900634765625, "log_odds_ratio": -0.78515625, "logits/chosen": 0.13269805908203125, "logits/rejected": 0.2973136901855469, "logps/chosen": -0.62451171875, "logps/rejected": -0.54107666015625, "loss": 10.0923, "nll_loss": 0.64581298828125, "rewards/accuracies": 0.0625, "rewards/chosen": -0.06244659423828125, "rewards/margins": -0.00838470458984375, "rewards/rejected": -0.0540618896484375, "step": 13 }, { "epoch": 0.007557864903164856, "grad_norm": 0.19690498542680196, "learning_rate": 1.999976754580968e-05, "log_odds_chosen": -0.111572265625, "log_odds_ratio": -0.7509765625, "logits/chosen": 0.2082061767578125, "logits/rejected": 0.3197815418243408, "logps/chosen": -0.45953369140625, "logps/rejected": -0.42266845703125, "loss": 9.4282, "nll_loss": 0.4749755859375, "rewards/accuracies": 0.0, "rewards/chosen": -0.04595947265625, "rewards/margins": -0.003681182861328125, "rewards/rejected": -0.042278289794921875, "step": 14 }, { "epoch": 0.00809771239624806, "grad_norm": 0.21636616206088014, "learning_rate": 1.9999636791119153e-05, "log_odds_chosen": -0.08746337890625, "log_odds_ratio": -0.742431640625, "logits/chosen": 0.271514892578125, "logits/rejected": 0.4210052490234375, "logps/chosen": -0.60980224609375, "logps/rejected": -0.5528564453125, "loss": 9.4917, "nll_loss": 0.61328125, "rewards/accuracies": 0.25, "rewards/chosen": -0.061004638671875, "rewards/margins": -0.00574493408203125, "rewards/rejected": -0.05525970458984375, "step": 15 }, { "epoch": 0.008637559889331264, "grad_norm": 0.2350308979148335, "learning_rate": 1.999947698060467e-05, "log_odds_chosen": -0.1563720703125, "log_odds_ratio": -0.77880859375, "logits/chosen": 0.144287109375, "logits/rejected": 0.2863311767578125, "logps/chosen": -0.553955078125, "logps/rejected": -0.4776611328125, "loss": 10.6631, "nll_loss": 0.564208984375, "rewards/accuracies": 0.0625, "rewards/chosen": -0.055419921875, "rewards/margins": -0.0076751708984375, "rewards/rejected": -0.0477447509765625, "step": 16 }, { "epoch": 0.009177407382414468, "grad_norm": 0.21516798355855707, "learning_rate": 1.9999288114730593e-05, "log_odds_chosen": -0.10845947265625, "log_odds_ratio": -0.750732421875, "logits/chosen": 0.0314178466796875, "logits/rejected": 0.23681640625, "logps/chosen": -0.4844970703125, "logps/rejected": -0.44415283203125, "loss": 9.5615, "nll_loss": 0.4989013671875, "rewards/accuracies": 0.0625, "rewards/chosen": -0.048431396484375, "rewards/margins": -0.00402069091796875, "rewards/rejected": -0.04441070556640625, "step": 17 }, { "epoch": 0.009717254875497672, "grad_norm": 0.20266173299384632, "learning_rate": 1.99990701940457e-05, "log_odds_chosen": -0.1265869140625, "log_odds_ratio": -0.759521484375, "logits/chosen": 0.10955810546875, "logits/rejected": 0.330322265625, "logps/chosen": -0.5015869140625, "logps/rejected": -0.451904296875, "loss": 9.7681, "nll_loss": 0.514404296875, "rewards/accuracies": 0.0, "rewards/chosen": -0.05014801025390625, "rewards/margins": -0.00498199462890625, "rewards/rejected": -0.045166015625, "step": 18 }, { "epoch": 0.010257102368580876, "grad_norm": 0.1986515391570371, "learning_rate": 1.9998823219183208e-05, "log_odds_chosen": -0.13427734375, "log_odds_ratio": -0.763427734375, "logits/chosen": 0.35520172119140625, "logits/rejected": 0.4933319091796875, "logps/chosen": -0.52178955078125, "logps/rejected": -0.46856689453125, "loss": 8.8672, "nll_loss": 0.53363037109375, "rewards/accuracies": 0.0625, "rewards/chosen": -0.05219268798828125, "rewards/margins": -0.005340576171875, "rewards/rejected": -0.04685211181640625, "step": 19 }, { "epoch": 0.01079694986166408, "grad_norm": 0.19475180850695326, "learning_rate": 1.9998547190860745e-05, "log_odds_chosen": -0.0792236328125, "log_odds_ratio": -0.737548828125, "logits/chosen": 0.29610443115234375, "logits/rejected": 0.4187026023864746, "logps/chosen": -0.5634765625, "logps/rejected": -0.5435791015625, "loss": 9.48, "nll_loss": 0.56793212890625, "rewards/accuracies": 0.0625, "rewards/chosen": -0.05634307861328125, "rewards/margins": -0.00193023681640625, "rewards/rejected": -0.054412841796875, "step": 20 }, { "epoch": 0.011336797354747285, "grad_norm": 0.19909001197923398, "learning_rate": 1.9998242109880362e-05, "log_odds_chosen": -0.1043701171875, "log_odds_ratio": -0.748291015625, "logits/chosen": 0.11765289306640625, "logits/rejected": 0.1923065185546875, "logps/chosen": -0.44677734375, "logps/rejected": -0.4144287109375, "loss": 8.8096, "nll_loss": 0.452392578125, "rewards/accuracies": 0.0625, "rewards/chosen": -0.0446624755859375, "rewards/margins": -0.003204345703125, "rewards/rejected": -0.0414581298828125, "step": 21 }, { "epoch": 0.011876644847830489, "grad_norm": 0.21484833380312818, "learning_rate": 1.9997907977128537e-05, "log_odds_chosen": -0.009521484375, "log_odds_ratio": -0.69970703125, "logits/chosen": 0.18085479736328125, "logits/rejected": 0.2681732177734375, "logps/chosen": -0.43701171875, "logps/rejected": -0.434326171875, "loss": 9.4087, "nll_loss": 0.4544677734375, "rewards/accuracies": 0.25, "rewards/chosen": -0.04367828369140625, "rewards/margins": -0.00025177001953125, "rewards/rejected": -0.043426513671875, "step": 22 }, { "epoch": 0.012416492340913693, "grad_norm": 0.20566961829387942, "learning_rate": 1.9997544793576146e-05, "log_odds_chosen": -0.0791015625, "log_odds_ratio": -0.734619140625, "logits/chosen": 0.22935104370117188, "logits/rejected": 0.3308219909667969, "logps/chosen": -0.491455078125, "logps/rejected": -0.463134765625, "loss": 9.96, "nll_loss": 0.4935302734375, "rewards/accuracies": 0.125, "rewards/chosen": -0.04913330078125, "rewards/margins": -0.00279998779296875, "rewards/rejected": -0.04633331298828125, "step": 23 }, { "epoch": 0.012956339833996897, "grad_norm": 0.18699057699180088, "learning_rate": 1.999715256027849e-05, "log_odds_chosen": -0.16082763671875, "log_odds_ratio": -0.77783203125, "logits/chosen": 0.120697021484375, "logits/rejected": 0.30518150329589844, "logps/chosen": -0.4693603515625, "logps/rejected": -0.41302490234375, "loss": 8.7817, "nll_loss": 0.474853515625, "rewards/accuracies": 0.0, "rewards/chosen": -0.04693603515625, "rewards/margins": -0.0056304931640625, "rewards/rejected": -0.0413055419921875, "step": 24 }, { "epoch": 0.0134961873270801, "grad_norm": 0.22113906312138432, "learning_rate": 1.9996731278375277e-05, "log_odds_chosen": -0.103759765625, "log_odds_ratio": -0.747802734375, "logits/chosen": 0.016195297241210938, "logits/rejected": 0.187286376953125, "logps/chosen": -0.40704345703125, "logps/rejected": -0.37261962890625, "loss": 9.9243, "nll_loss": 0.4219970703125, "rewards/accuracies": 0.0625, "rewards/chosen": -0.04073333740234375, "rewards/margins": -0.00347137451171875, "rewards/rejected": -0.037261962890625, "step": 25 }, { "epoch": 0.014036034820163305, "grad_norm": 0.2241204420314347, "learning_rate": 1.999628094909062e-05, "log_odds_chosen": -0.0667724609375, "log_odds_ratio": -0.730712890625, "logits/chosen": 0.195953369140625, "logits/rejected": 0.2894287109375, "logps/chosen": -0.56524658203125, "logps/rejected": -0.52215576171875, "loss": 10.6411, "nll_loss": 0.5712890625, "rewards/accuracies": 0.1875, "rewards/chosen": -0.056549072265625, "rewards/margins": -0.00435638427734375, "rewards/rejected": -0.05219268798828125, "step": 26 }, { "epoch": 0.014575882313246509, "grad_norm": 0.19086699499679274, "learning_rate": 1.999580157373304e-05, "log_odds_chosen": -0.1312255859375, "log_odds_ratio": -0.7626953125, "logits/chosen": 0.2828369140625, "logits/rejected": 0.55926513671875, "logps/chosen": -0.5262451171875, "logps/rejected": -0.474853515625, "loss": 9.3721, "nll_loss": 0.54541015625, "rewards/accuracies": 0.0, "rewards/chosen": -0.052642822265625, "rewards/margins": -0.00513458251953125, "rewards/rejected": -0.04750823974609375, "step": 27 }, { "epoch": 0.015115729806329713, "grad_norm": 0.2030232762926025, "learning_rate": 1.9995293153695445e-05, "log_odds_chosen": -0.0809326171875, "log_odds_ratio": -0.736083984375, "logits/chosen": 0.28302001953125, "logits/rejected": 0.4391021728515625, "logps/chosen": -0.4979248046875, "logps/rejected": -0.465087890625, "loss": 9.2275, "nll_loss": 0.50146484375, "rewards/accuracies": 0.1875, "rewards/chosen": -0.04978179931640625, "rewards/margins": -0.003265380859375, "rewards/rejected": -0.04651641845703125, "step": 28 }, { "epoch": 0.015655577299412915, "grad_norm": 0.2135821474904866, "learning_rate": 1.9994755690455154e-05, "log_odds_chosen": -0.0367431640625, "log_odds_ratio": -0.713134765625, "logits/chosen": 0.14898681640625, "logits/rejected": 0.24535751342773438, "logps/chosen": -0.49658203125, "logps/rejected": -0.4866943359375, "loss": 9.4268, "nll_loss": 0.5047607421875, "rewards/accuracies": 0.25, "rewards/chosen": -0.04962158203125, "rewards/margins": -0.00095367431640625, "rewards/rejected": -0.04866790771484375, "step": 29 }, { "epoch": 0.01619542479249612, "grad_norm": 0.20326483670789466, "learning_rate": 1.9994189185573852e-05, "log_odds_chosen": -0.087890625, "log_odds_ratio": -0.739013671875, "logits/chosen": 0.2962989807128906, "logits/rejected": 0.43933868408203125, "logps/chosen": -0.6378173828125, "logps/rejected": -0.5882568359375, "loss": 9.9258, "nll_loss": 0.648193359375, "rewards/accuracies": 0.0625, "rewards/chosen": -0.0637359619140625, "rewards/margins": -0.00492095947265625, "rewards/rejected": -0.05881500244140625, "step": 30 }, { "epoch": 0.016735272285579323, "grad_norm": 0.21423201793368668, "learning_rate": 1.9993593640697644e-05, "log_odds_chosen": -0.03326416015625, "log_odds_ratio": -0.7119140625, "logits/chosen": 0.105499267578125, "logits/rejected": 0.25384521484375, "logps/chosen": -0.46356201171875, "logps/rejected": -0.4464111328125, "loss": 9.4155, "nll_loss": 0.46624755859375, "rewards/accuracies": 0.25, "rewards/chosen": -0.0464019775390625, "rewards/margins": -0.00179290771484375, "rewards/rejected": -0.04460906982421875, "step": 31 }, { "epoch": 0.01727511977866253, "grad_norm": 25.49488572774, "learning_rate": 1.9992969057556988e-05, "log_odds_chosen": 2.09033203125, "log_odds_ratio": -0.28469085693359375, "logits/chosen": -0.003978729248046875, "logits/rejected": 0.8504638671875, "logps/chosen": -0.483642578125, "logps/rejected": -1.9393310546875, "loss": 10.0964, "nll_loss": 0.482666015625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0483245849609375, "rewards/margins": 0.145599365234375, "rewards/rejected": -0.1940765380859375, "step": 32 }, { "epoch": 0.01781496727174573, "grad_norm": 0.23647784317168868, "learning_rate": 1.9992315437966726e-05, "log_odds_chosen": -0.04736328125, "log_odds_ratio": -0.71826171875, "logits/chosen": -0.06860318779945374, "logits/rejected": 0.11444091796875, "logps/chosen": -0.5087890625, "logps/rejected": -0.4852294921875, "loss": 9.3313, "nll_loss": 0.51123046875, "rewards/accuracies": 0.1875, "rewards/chosen": -0.05088043212890625, "rewards/margins": -0.00234222412109375, "rewards/rejected": -0.0485382080078125, "step": 33 }, { "epoch": 0.018354814764828937, "grad_norm": 0.20883831737185038, "learning_rate": 1.9991632783826076e-05, "log_odds_chosen": -0.150390625, "log_odds_ratio": -0.773681640625, "logits/chosen": -0.0989837646484375, "logits/rejected": 0.187286376953125, "logps/chosen": -0.65380859375, "logps/rejected": -0.574462890625, "loss": 9.3945, "nll_loss": 0.6519775390625, "rewards/accuracies": 0.0625, "rewards/chosen": -0.0654144287109375, "rewards/margins": -0.0079803466796875, "rewards/rejected": -0.05743408203125, "step": 34 }, { "epoch": 0.01889466225791214, "grad_norm": 0.2294748292137733, "learning_rate": 1.9990921097118615e-05, "log_odds_chosen": -0.0672607421875, "log_odds_ratio": -0.729736328125, "logits/chosen": 0.21051788330078125, "logits/rejected": 0.3704719543457031, "logps/chosen": -0.5533447265625, "logps/rejected": -0.514892578125, "loss": 10.1279, "nll_loss": 0.55810546875, "rewards/accuracies": 0.25, "rewards/chosen": -0.0553741455078125, "rewards/margins": -0.0038909912109375, "rewards/rejected": -0.051483154296875, "step": 35 }, { "epoch": 0.019434509750995345, "grad_norm": 0.19537415232488894, "learning_rate": 1.999018037991229e-05, "log_odds_chosen": -0.073486328125, "log_odds_ratio": -0.73193359375, "logits/chosen": 0.1170196533203125, "logits/rejected": 0.305938720703125, "logps/chosen": -0.5345458984375, "logps/rejected": -0.49658203125, "loss": 8.7642, "nll_loss": 0.5411376953125, "rewards/accuracies": 0.3125, "rewards/chosen": -0.05344390869140625, "rewards/margins": -0.0037841796875, "rewards/rejected": -0.04965972900390625, "step": 36 }, { "epoch": 0.019974357244078547, "grad_norm": 0.21129581549786267, "learning_rate": 1.9989410634359382e-05, "log_odds_chosen": -0.0313720703125, "log_odds_ratio": -0.709228515625, "logits/chosen": 0.20598602294921875, "logits/rejected": 0.3185577392578125, "logps/chosen": -0.46014404296875, "logps/rejected": -0.44464111328125, "loss": 10.0381, "nll_loss": 0.47021484375, "rewards/accuracies": 0.25, "rewards/chosen": -0.045989990234375, "rewards/margins": -0.00156402587890625, "rewards/rejected": -0.04442596435546875, "step": 37 }, { "epoch": 0.020514204737161753, "grad_norm": 0.21256624381627698, "learning_rate": 1.9988611862696542e-05, "log_odds_chosen": -0.1033935546875, "log_odds_ratio": -0.750244140625, "logits/chosen": 0.457763671875, "logits/rejected": 0.6049118041992188, "logps/chosen": -0.6748046875, "logps/rejected": -0.6075439453125, "loss": 9.9648, "nll_loss": 0.68902587890625, "rewards/accuracies": 0.25, "rewards/chosen": -0.067474365234375, "rewards/margins": -0.0066986083984375, "rewards/rejected": -0.0607757568359375, "step": 38 }, { "epoch": 0.021054052230244955, "grad_norm": 0.21011955119846062, "learning_rate": 1.9987784067244748e-05, "log_odds_chosen": -0.04345703125, "log_odds_ratio": -0.7177734375, "logits/chosen": 0.07346343994140625, "logits/rejected": 0.2229766845703125, "logps/chosen": -0.5589599609375, "logps/rejected": -0.5235595703125, "loss": 10.4985, "nll_loss": 0.5596923828125, "rewards/accuracies": 0.3125, "rewards/chosen": -0.055908203125, "rewards/margins": -0.003509521484375, "rewards/rejected": -0.052398681640625, "step": 39 }, { "epoch": 0.02159389972332816, "grad_norm": 0.18655575716893655, "learning_rate": 1.9986927250409313e-05, "log_odds_chosen": -0.0023193359375, "log_odds_ratio": -0.69482421875, "logits/chosen": 0.047061920166015625, "logits/rejected": 0.172576904296875, "logps/chosen": -0.44921875, "logps/rejected": -0.4466552734375, "loss": 8.689, "nll_loss": 0.448974609375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0449371337890625, "rewards/margins": -0.0002593994140625, "rewards/rejected": -0.044677734375, "step": 40 }, { "epoch": 0.022133747216411363, "grad_norm": 0.2151008997517863, "learning_rate": 1.998604141467988e-05, "log_odds_chosen": -0.02593994140625, "log_odds_ratio": -0.707763671875, "logits/chosen": 0.3104301691055298, "logits/rejected": 0.41162109375, "logps/chosen": -0.58917236328125, "logps/rejected": -0.563720703125, "loss": 9.6162, "nll_loss": 0.59234619140625, "rewards/accuracies": 0.375, "rewards/chosen": -0.0588531494140625, "rewards/margins": -0.00246429443359375, "rewards/rejected": -0.05638885498046875, "step": 41 }, { "epoch": 0.02267359470949457, "grad_norm": 0.3079843907586099, "learning_rate": 1.998512656263041e-05, "log_odds_chosen": -0.001953125, "log_odds_ratio": -0.7049560546875, "logits/chosen": 0.06647872924804688, "logits/rejected": 0.140777587890625, "logps/chosen": -0.524169921875, "logps/rejected": -0.4959716796875, "loss": 9.6821, "nll_loss": 0.52294921875, "rewards/accuracies": 0.375, "rewards/chosen": -0.0524444580078125, "rewards/margins": -0.002838134765625, "rewards/rejected": -0.0496063232421875, "step": 42 }, { "epoch": 0.02321344220257777, "grad_norm": 0.2828911148195539, "learning_rate": 1.9984182696919185e-05, "log_odds_chosen": 0.15228271484375, "log_odds_ratio": -0.6259765625, "logits/chosen": 0.176177978515625, "logits/rejected": 0.323883056640625, "logps/chosen": -0.56951904296875, "logps/rejected": -0.612060546875, "loss": 9.4641, "nll_loss": 0.58441162109375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0569305419921875, "rewards/margins": 0.0043182373046875, "rewards/rejected": -0.061248779296875, "step": 43 }, { "epoch": 0.023753289695660977, "grad_norm": 0.30824065326653327, "learning_rate": 1.9983209820288776e-05, "log_odds_chosen": 0.3564453125, "log_odds_ratio": -0.54638671875, "logits/chosen": 0.4134674072265625, "logits/rejected": 0.4789085388183594, "logps/chosen": -0.5711669921875, "logps/rejected": -0.7423095703125, "loss": 10.1455, "nll_loss": 0.5819091796875, "rewards/accuracies": 0.9375, "rewards/chosen": -0.05710601806640625, "rewards/margins": 0.01712799072265625, "rewards/rejected": -0.0742340087890625, "step": 44 }, { "epoch": 0.02429313718874418, "grad_norm": 0.22188511562018282, "learning_rate": 1.998220793556606e-05, "log_odds_chosen": 0.5517578125, "log_odds_ratio": -0.473388671875, "logits/chosen": 0.21323776245117188, "logits/rejected": 0.22760391235351562, "logps/chosen": -0.49267578125, "logps/rejected": -0.74462890625, "loss": 8.8364, "nll_loss": 0.4964599609375, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04925537109375, "rewards/margins": 0.025177001953125, "rewards/rejected": -0.074432373046875, "step": 45 }, { "epoch": 0.024832984681827385, "grad_norm": 0.5177579709810056, "learning_rate": 1.9981177045662203e-05, "log_odds_chosen": 0.39617919921875, "log_odds_ratio": -0.53668212890625, "logits/chosen": 0.4163665771484375, "logits/rejected": 0.43804931640625, "logps/chosen": -0.65234375, "logps/rejected": -0.8427734375, "loss": 10.3496, "nll_loss": 0.6490478515625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0652618408203125, "rewards/margins": 0.0190277099609375, "rewards/rejected": -0.0842742919921875, "step": 46 }, { "epoch": 0.025372832174910587, "grad_norm": 0.3041298360257454, "learning_rate": 1.9980117153572652e-05, "log_odds_chosen": 0.396240234375, "log_odds_ratio": -0.5328369140625, "logits/chosen": 0.0301513671875, "logits/rejected": 0.07933807373046875, "logps/chosen": -0.43756103515625, "logps/rejected": -0.5853271484375, "loss": 8.3831, "nll_loss": 0.44805908203125, "rewards/accuracies": 0.875, "rewards/chosen": -0.0437469482421875, "rewards/margins": 0.01477813720703125, "rewards/rejected": -0.05853271484375, "step": 47 }, { "epoch": 0.025912679667993793, "grad_norm": 0.20844361605931697, "learning_rate": 1.997902826237712e-05, "log_odds_chosen": 0.075439453125, "log_odds_ratio": -0.663818359375, "logits/chosen": 0.2189788818359375, "logits/rejected": 0.32183837890625, "logps/chosen": -0.4649658203125, "logps/rejected": -0.49188232421875, "loss": 9.1084, "nll_loss": 0.4918212890625, "rewards/accuracies": 0.875, "rewards/chosen": -0.04647064208984375, "rewards/margins": 0.00272369384765625, "rewards/rejected": -0.0491943359375, "step": 48 }, { "epoch": 0.026452527161076995, "grad_norm": 0.19148187338730321, "learning_rate": 1.997791037523958e-05, "log_odds_chosen": 0.0382080078125, "log_odds_ratio": -0.676513671875, "logits/chosen": -0.018585205078125, "logits/rejected": 0.11871337890625, "logps/chosen": -0.515625, "logps/rejected": -0.521240234375, "loss": 9.1729, "nll_loss": 0.5303955078125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05152130126953125, "rewards/margins": 0.000640869140625, "rewards/rejected": -0.05216217041015625, "step": 49 }, { "epoch": 0.0269923746541602, "grad_norm": 0.21298119213479585, "learning_rate": 1.9976763495408273e-05, "log_odds_chosen": 0.0189208984375, "log_odds_ratio": -0.68603515625, "logits/chosen": 0.24065399169921875, "logits/rejected": 0.336395263671875, "logps/chosen": -0.5010986328125, "logps/rejected": -0.5029296875, "loss": 9.8477, "nll_loss": 0.514892578125, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05010986328125, "rewards/margins": 0.000152587890625, "rewards/rejected": -0.050262451171875, "step": 50 }, { "epoch": 0.027532222147243404, "grad_norm": 0.21028681096790564, "learning_rate": 1.9975587626215666e-05, "log_odds_chosen": -0.0413818359375, "log_odds_ratio": -0.715087890625, "logits/chosen": 0.2926788330078125, "logits/rejected": 0.411376953125, "logps/chosen": -0.5145263671875, "logps/rejected": -0.497314453125, "loss": 9.9846, "nll_loss": 0.5340576171875, "rewards/accuracies": 0.25, "rewards/chosen": -0.0514678955078125, "rewards/margins": -0.00176239013671875, "rewards/rejected": -0.04970550537109375, "step": 51 }, { "epoch": 0.02807206964032661, "grad_norm": 0.2194208548408586, "learning_rate": 1.9974382771078473e-05, "log_odds_chosen": 0.0081787109375, "log_odds_ratio": -0.697021484375, "logits/chosen": 0.1072998046875, "logits/rejected": 0.2467041015625, "logps/chosen": -0.59735107421875, "logps/rejected": -0.56988525390625, "loss": 10.5664, "nll_loss": 0.60382080078125, "rewards/accuracies": 0.5, "rewards/chosen": -0.05966949462890625, "rewards/margins": -0.00266265869140625, "rewards/rejected": -0.0570068359375, "step": 52 }, { "epoch": 0.02861191713340981, "grad_norm": 0.20979751874921287, "learning_rate": 1.9973148933497625e-05, "log_odds_chosen": -0.0709228515625, "log_odds_ratio": -0.731201171875, "logits/chosen": 0.035190582275390625, "logits/rejected": 0.184234619140625, "logps/chosen": -0.5120849609375, "logps/rejected": -0.477783203125, "loss": 9.7261, "nll_loss": 0.526611328125, "rewards/accuracies": 0.25, "rewards/chosen": -0.0512237548828125, "rewards/margins": -0.00345611572265625, "rewards/rejected": -0.04776763916015625, "step": 53 }, { "epoch": 0.029151764626493017, "grad_norm": 0.1925770828042212, "learning_rate": 1.997188611705827e-05, "log_odds_chosen": -0.0032958984375, "log_odds_ratio": -0.69677734375, "logits/chosen": 0.15976715087890625, "logits/rejected": 0.24672698974609375, "logps/chosen": -0.50244140625, "logps/rejected": -0.4996337890625, "loss": 8.9121, "nll_loss": 0.53515625, "rewards/accuracies": 0.25, "rewards/chosen": -0.05019378662109375, "rewards/margins": -0.00022125244140625, "rewards/rejected": -0.0499725341796875, "step": 54 }, { "epoch": 0.02969161211957622, "grad_norm": 0.22352585229939453, "learning_rate": 1.9970594325429755e-05, "log_odds_chosen": -0.075927734375, "log_odds_ratio": -0.733154296875, "logits/chosen": 0.291473388671875, "logits/rejected": 0.44415283203125, "logps/chosen": -0.6348876953125, "logps/rejected": -0.5946044921875, "loss": 10.354, "nll_loss": 0.646240234375, "rewards/accuracies": 0.25, "rewards/chosen": -0.0635223388671875, "rewards/margins": -0.00408935546875, "rewards/rejected": -0.0594329833984375, "step": 55 }, { "epoch": 0.030231459612659425, "grad_norm": 0.1953034828883627, "learning_rate": 1.9969273562365626e-05, "log_odds_chosen": -0.06060791015625, "log_odds_ratio": -0.72509765625, "logits/chosen": 0.22074127197265625, "logits/rejected": 0.34014892578125, "logps/chosen": -0.4967041015625, "logps/rejected": -0.46759033203125, "loss": 9.8691, "nll_loss": 0.50836181640625, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0496368408203125, "rewards/margins": -0.00286102294921875, "rewards/rejected": -0.04677581787109375, "step": 56 }, { "epoch": 0.030771307105742628, "grad_norm": 0.20205541202632454, "learning_rate": 1.9967923831703608e-05, "log_odds_chosen": 0.00714111328125, "log_odds_ratio": -0.69091796875, "logits/chosen": 0.322021484375, "logits/rejected": 0.5377511978149414, "logps/chosen": -0.51849365234375, "logps/rejected": -0.5157470703125, "loss": 9.564, "nll_loss": 0.53179931640625, "rewards/accuracies": 0.375, "rewards/chosen": -0.05181884765625, "rewards/margins": -0.000244140625, "rewards/rejected": -0.05157470703125, "step": 57 }, { "epoch": 0.03131115459882583, "grad_norm": 0.1932655482232203, "learning_rate": 1.9966545137365595e-05, "log_odds_chosen": -0.0303955078125, "log_odds_ratio": -0.708740234375, "logits/chosen": 0.2806243896484375, "logits/rejected": 0.466217041015625, "logps/chosen": -0.5106201171875, "logps/rejected": -0.49462890625, "loss": 9.1475, "nll_loss": 0.519775390625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0510711669921875, "rewards/margins": -0.0015869140625, "rewards/rejected": -0.0494842529296875, "step": 58 }, { "epoch": 0.03185100209190903, "grad_norm": 0.17637007874402608, "learning_rate": 1.9965137483357648e-05, "log_odds_chosen": -0.07403564453125, "log_odds_ratio": -0.732666015625, "logits/chosen": 0.1884288787841797, "logits/rejected": 0.3271026611328125, "logps/chosen": -0.553466796875, "logps/rejected": -0.51177978515625, "loss": 8.918, "nll_loss": 0.56060791015625, "rewards/accuracies": 0.25, "rewards/chosen": -0.05535888671875, "rewards/margins": -0.00418853759765625, "rewards/rejected": -0.05117034912109375, "step": 59 }, { "epoch": 0.03239084958499224, "grad_norm": 0.2057486088258782, "learning_rate": 1.9963700873769966e-05, "log_odds_chosen": -0.0130615234375, "log_odds_ratio": -0.7001953125, "logits/chosen": 0.319366455078125, "logits/rejected": 0.49102020263671875, "logps/chosen": -0.5355224609375, "logps/rejected": -0.5252685546875, "loss": 9.6855, "nll_loss": 0.5458984375, "rewards/accuracies": 0.375, "rewards/chosen": -0.05353546142578125, "rewards/margins": -0.00102996826171875, "rewards/rejected": -0.0525054931640625, "step": 60 }, { "epoch": 0.032930697078075444, "grad_norm": 0.21187221837503606, "learning_rate": 1.9962235312776893e-05, "log_odds_chosen": -0.04052734375, "log_odds_ratio": -0.715087890625, "logits/chosen": 0.334716796875, "logits/rejected": 0.3655548095703125, "logps/chosen": -0.539794921875, "logps/rejected": -0.51953125, "loss": 10.229, "nll_loss": 0.5582275390625, "rewards/accuracies": 0.375, "rewards/chosen": -0.05402374267578125, "rewards/margins": -0.0020904541015625, "rewards/rejected": -0.05193328857421875, "step": 61 }, { "epoch": 0.033470544571158646, "grad_norm": 0.17671295650251345, "learning_rate": 1.996074080463688e-05, "log_odds_chosen": -0.0159912109375, "log_odds_ratio": -0.701416015625, "logits/chosen": 0.18841552734375, "logits/rejected": 0.3006744384765625, "logps/chosen": -0.45263671875, "logps/rejected": -0.4403076171875, "loss": 8.9751, "nll_loss": 0.45458984375, "rewards/accuracies": 0.5, "rewards/chosen": -0.04531097412109375, "rewards/margins": -0.0012664794921875, "rewards/rejected": -0.04404449462890625, "step": 62 }, { "epoch": 0.03401039206424185, "grad_norm": 0.17728631823126625, "learning_rate": 1.9959217353692516e-05, "log_odds_chosen": -0.0479736328125, "log_odds_ratio": -0.717529296875, "logits/chosen": 0.490234375, "logits/rejected": 0.6383056640625, "logps/chosen": -0.52490234375, "logps/rejected": -0.5042724609375, "loss": 9.2207, "nll_loss": 0.541015625, "rewards/accuracies": 0.125, "rewards/chosen": -0.0524444580078125, "rewards/margins": -0.00200653076171875, "rewards/rejected": -0.05043792724609375, "step": 63 }, { "epoch": 0.03455023955732506, "grad_norm": 0.19164851531442317, "learning_rate": 1.9957664964370465e-05, "log_odds_chosen": -0.0599365234375, "log_odds_ratio": -0.7255859375, "logits/chosen": 0.187286376953125, "logits/rejected": 0.3094482421875, "logps/chosen": -0.547607421875, "logps/rejected": -0.51708984375, "loss": 9.1123, "nll_loss": 0.55224609375, "rewards/accuracies": 0.3125, "rewards/chosen": -0.054779052734375, "rewards/margins": -0.0030517578125, "rewards/rejected": -0.051727294921875, "step": 64 }, { "epoch": 0.03509008705040826, "grad_norm": 0.21440013104238442, "learning_rate": 1.995608364118149e-05, "log_odds_chosen": -0.0167236328125, "log_odds_ratio": -0.702392578125, "logits/chosen": 0.13726806640625, "logits/rejected": 0.21173095703125, "logps/chosen": -0.48193359375, "logps/rejected": -0.4776611328125, "loss": 10.0264, "nll_loss": 0.489990234375, "rewards/accuracies": 0.375, "rewards/chosen": -0.048187255859375, "rewards/margins": -0.00043487548828125, "rewards/rejected": -0.04775238037109375, "step": 65 }, { "epoch": 0.03562993454349146, "grad_norm": 0.17572507551113214, "learning_rate": 1.9954473388720417e-05, "log_odds_chosen": -0.00390625, "log_odds_ratio": -0.695556640625, "logits/chosen": -0.02402210235595703, "logits/rejected": 0.092254638671875, "logps/chosen": -0.3988037109375, "logps/rejected": -0.3966064453125, "loss": 8.667, "nll_loss": 0.4019775390625, "rewards/accuracies": 0.375, "rewards/chosen": -0.03983306884765625, "rewards/margins": -0.00019073486328125, "rewards/rejected": -0.039642333984375, "step": 66 }, { "epoch": 0.036169782036574664, "grad_norm": 0.21053720647490104, "learning_rate": 1.995283421166614e-05, "log_odds_chosen": 0.012451171875, "log_odds_ratio": -0.687744140625, "logits/chosen": 0.2247467041015625, "logits/rejected": 0.36757659912109375, "logps/chosen": -0.4793701171875, "logps/rejected": -0.4803466796875, "loss": 9.3154, "nll_loss": 0.4974365234375, "rewards/accuracies": 0.4375, "rewards/chosen": -0.047943115234375, "rewards/margins": 7.62939453125e-05, "rewards/rejected": -0.0480194091796875, "step": 67 }, { "epoch": 0.036709629529657874, "grad_norm": 0.1914491423939911, "learning_rate": 1.99511661147816e-05, "log_odds_chosen": -0.03094482421875, "log_odds_ratio": -0.711181640625, "logits/chosen": 0.08795166015625, "logits/rejected": 0.2893562316894531, "logps/chosen": -0.68548583984375, "logps/rejected": -0.65478515625, "loss": 9.9092, "nll_loss": 0.68585205078125, "rewards/accuracies": 0.25, "rewards/chosen": -0.06858062744140625, "rewards/margins": -0.00312042236328125, "rewards/rejected": -0.065460205078125, "step": 68 }, { "epoch": 0.037249477022741076, "grad_norm": 0.19677919879982725, "learning_rate": 1.9949469102913762e-05, "log_odds_chosen": -0.0423583984375, "log_odds_ratio": -0.715576171875, "logits/chosen": 0.23613619804382324, "logits/rejected": 0.4024932384490967, "logps/chosen": -0.5106201171875, "logps/rejected": -0.48974609375, "loss": 9.4956, "nll_loss": 0.5203857421875, "rewards/accuracies": 0.25, "rewards/chosen": -0.051055908203125, "rewards/margins": -0.00206756591796875, "rewards/rejected": -0.04898834228515625, "step": 69 }, { "epoch": 0.03778932451582428, "grad_norm": 0.18965836249484327, "learning_rate": 1.9947743180993617e-05, "log_odds_chosen": -0.050537109375, "log_odds_ratio": -0.7216796875, "logits/chosen": -0.129791259765625, "logits/rejected": 0.054351806640625, "logps/chosen": -0.5616455078125, "logps/rejected": -0.5262451171875, "loss": 9.4062, "nll_loss": 0.5616455078125, "rewards/accuracies": 0.25, "rewards/chosen": -0.0561676025390625, "rewards/margins": -0.00353240966796875, "rewards/rejected": -0.05263519287109375, "step": 70 }, { "epoch": 0.03832917200890748, "grad_norm": 0.18168353265644308, "learning_rate": 1.994598835403615e-05, "log_odds_chosen": -0.01458740234375, "log_odds_ratio": -0.701171875, "logits/chosen": 0.21490478515625, "logits/rejected": 0.31085205078125, "logps/chosen": -0.45989990234375, "logps/rejected": -0.45361328125, "loss": 9.3003, "nll_loss": 0.4666748046875, "rewards/accuracies": 0.5, "rewards/chosen": -0.04602813720703125, "rewards/margins": -0.00067901611328125, "rewards/rejected": -0.04534912109375, "step": 71 }, { "epoch": 0.03886901950199069, "grad_norm": 0.17707909780562253, "learning_rate": 1.9944204627140344e-05, "log_odds_chosen": -0.0447998046875, "log_odds_ratio": -0.71630859375, "logits/chosen": 0.2020111083984375, "logits/rejected": 0.36175537109375, "logps/chosen": -0.579833984375, "logps/rejected": -0.5513916015625, "loss": 9.0229, "nll_loss": 0.594970703125, "rewards/accuracies": 0.25, "rewards/chosen": -0.05797576904296875, "rewards/margins": -0.0028533935546875, "rewards/rejected": -0.05512237548828125, "step": 72 }, { "epoch": 0.03940886699507389, "grad_norm": 0.20322888213732232, "learning_rate": 1.994239200548915e-05, "log_odds_chosen": -0.0772705078125, "log_odds_ratio": -0.738525390625, "logits/chosen": 0.20714378356933594, "logits/rejected": 0.3835182189941406, "logps/chosen": -0.658935546875, "logps/rejected": -0.5992431640625, "loss": 10.5483, "nll_loss": 0.66748046875, "rewards/accuracies": 0.1875, "rewards/chosen": -0.0659027099609375, "rewards/margins": -0.0059967041015625, "rewards/rejected": -0.059906005859375, "step": 73 }, { "epoch": 0.039948714488157094, "grad_norm": 0.17465472867880227, "learning_rate": 1.9940550494349482e-05, "log_odds_chosen": -0.02203369140625, "log_odds_ratio": -0.704345703125, "logits/chosen": 0.317138671875, "logits/rejected": 0.3932647705078125, "logps/chosen": -0.5013427734375, "logps/rejected": -0.49298095703125, "loss": 9.0942, "nll_loss": 0.502197265625, "rewards/accuracies": 0.25, "rewards/chosen": -0.05013275146484375, "rewards/margins": -0.0008544921875, "rewards/rejected": -0.04927825927734375, "step": 74 }, { "epoch": 0.0404885619812403, "grad_norm": 0.21128133335928515, "learning_rate": 1.9938680099072197e-05, "log_odds_chosen": -0.0242919921875, "log_odds_ratio": -0.7132568359375, "logits/chosen": 0.19470596313476562, "logits/rejected": 0.27942657470703125, "logps/chosen": -0.6571044921875, "logps/rejected": -0.656982421875, "loss": 9.9165, "nll_loss": 0.6590576171875, "rewards/accuracies": 0.1875, "rewards/chosen": -0.0656890869140625, "rewards/margins": 2.288818359375e-05, "rewards/rejected": -0.06571197509765625, "step": 75 }, { "epoch": 0.041028409474323506, "grad_norm": 0.18218287190809435, "learning_rate": 1.993678082509208e-05, "log_odds_chosen": -0.0421142578125, "log_odds_ratio": -0.714599609375, "logits/chosen": 0.4266357421875, "logits/rejected": 0.54168701171875, "logps/chosen": -0.505126953125, "logps/rejected": -0.4891357421875, "loss": 9.082, "nll_loss": 0.520263671875, "rewards/accuracies": 0.1875, "rewards/chosen": -0.0505523681640625, "rewards/margins": -0.00164031982421875, "rewards/rejected": -0.04891204833984375, "step": 76 }, { "epoch": 0.04156825696740671, "grad_norm": 0.1843791811038704, "learning_rate": 1.9934852677927834e-05, "log_odds_chosen": 0.0032958984375, "log_odds_ratio": -0.69140625, "logits/chosen": -0.0150604248046875, "logits/rejected": 0.033233642578125, "logps/chosen": -0.38922119140625, "logps/rejected": -0.39154052734375, "loss": 8.7129, "nll_loss": 0.3975830078125, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0389251708984375, "rewards/margins": 0.00019073486328125, "rewards/rejected": -0.03911590576171875, "step": 77 }, { "epoch": 0.04210810446048991, "grad_norm": 0.18813032014335393, "learning_rate": 1.9932895663182044e-05, "log_odds_chosen": 0.00640869140625, "log_odds_ratio": -0.690185546875, "logits/chosen": 0.030487060546875, "logits/rejected": 0.11048507690429688, "logps/chosen": -0.409912109375, "logps/rejected": -0.41021728515625, "loss": 9.2554, "nll_loss": 0.4158935546875, "rewards/accuracies": 0.3125, "rewards/chosen": -0.040985107421875, "rewards/margins": 3.0517578125e-05, "rewards/rejected": -0.041015625, "step": 78 }, { "epoch": 0.04264795195357311, "grad_norm": 0.21056081510394228, "learning_rate": 1.9930909786541185e-05, "log_odds_chosen": -0.0623779296875, "log_odds_ratio": -0.725830078125, "logits/chosen": 0.15869140625, "logits/rejected": 0.27471923828125, "logps/chosen": -0.613525390625, "logps/rejected": -0.5806884765625, "loss": 10.3452, "nll_loss": 0.6246337890625, "rewards/accuracies": 0.1875, "rewards/chosen": -0.0613250732421875, "rewards/margins": -0.00323486328125, "rewards/rejected": -0.0580902099609375, "step": 79 }, { "epoch": 0.04318779944665632, "grad_norm": 0.20465836771211912, "learning_rate": 1.9928895053775602e-05, "log_odds_chosen": -0.04010009765625, "log_odds_ratio": -0.714599609375, "logits/chosen": -0.13035202026367188, "logits/rejected": 0.0490264892578125, "logps/chosen": -0.46746826171875, "logps/rejected": -0.4423828125, "loss": 9.3362, "nll_loss": 0.47528076171875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0467529296875, "rewards/margins": -0.00254058837890625, "rewards/rejected": -0.04421234130859375, "step": 80 }, { "epoch": 0.043727646939739524, "grad_norm": 0.18962688286918372, "learning_rate": 1.9926851470739472e-05, "log_odds_chosen": -0.0152587890625, "log_odds_ratio": -0.701904296875, "logits/chosen": -0.0152130126953125, "logits/rejected": 0.08636474609375, "logps/chosen": -0.41778564453125, "logps/rejected": -0.409423828125, "loss": 8.8525, "nll_loss": 0.42279052734375, "rewards/accuracies": 0.375, "rewards/chosen": -0.0417633056640625, "rewards/margins": -0.0008087158203125, "rewards/rejected": -0.04095458984375, "step": 81 }, { "epoch": 0.04426749443282273, "grad_norm": 0.18593012904493839, "learning_rate": 1.9924779043370813e-05, "log_odds_chosen": 0.00885009765625, "log_odds_ratio": -0.6904296875, "logits/chosen": 0.0992431640625, "logits/rejected": 0.23016357421875, "logps/chosen": -0.42755126953125, "logps/rejected": -0.4232177734375, "loss": 9.3657, "nll_loss": 0.434326171875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.04273223876953125, "rewards/margins": -0.00042724609375, "rewards/rejected": -0.04230499267578125, "step": 82 }, { "epoch": 0.04480734192590593, "grad_norm": 0.20896909080208095, "learning_rate": 1.9922677777691454e-05, "log_odds_chosen": 0.05316162109375, "log_odds_ratio": -0.66943359375, "logits/chosen": 0.033599853515625, "logits/rejected": 0.039215087890625, "logps/chosen": -0.45074462890625, "logps/rejected": -0.47705078125, "loss": 9.9404, "nll_loss": 0.45538330078125, "rewards/accuracies": 0.75, "rewards/chosen": -0.04505157470703125, "rewards/margins": 0.00267791748046875, "rewards/rejected": -0.0477294921875, "step": 83 }, { "epoch": 0.04534718941898914, "grad_norm": 0.20996261796573557, "learning_rate": 1.9920547679807016e-05, "log_odds_chosen": -0.0439453125, "log_odds_ratio": -0.7177734375, "logits/chosen": 0.09697914123535156, "logits/rejected": 0.3052825927734375, "logps/chosen": -0.6070556640625, "logps/rejected": -0.5782470703125, "loss": 10.8535, "nll_loss": 0.617919921875, "rewards/accuracies": 0.375, "rewards/chosen": -0.0607147216796875, "rewards/margins": -0.00281524658203125, "rewards/rejected": -0.05789947509765625, "step": 84 }, { "epoch": 0.04588703691207234, "grad_norm": 0.20377681111193263, "learning_rate": 1.9918388755906896e-05, "log_odds_chosen": -0.0001220703125, "log_odds_ratio": -0.693359375, "logits/chosen": 0.25478363037109375, "logits/rejected": 0.32969093322753906, "logps/chosen": -0.4473876953125, "logps/rejected": -0.44384765625, "loss": 9.5845, "nll_loss": 0.4617919921875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0446929931640625, "rewards/margins": -0.00030517578125, "rewards/rejected": -0.0443878173828125, "step": 85 }, { "epoch": 0.04642688440515554, "grad_norm": 0.1747776115491911, "learning_rate": 1.9916201012264255e-05, "log_odds_chosen": 0.03271484375, "log_odds_ratio": -0.677978515625, "logits/chosen": 0.19503402709960938, "logits/rejected": 0.274200439453125, "logps/chosen": -0.5101318359375, "logps/rejected": -0.5269775390625, "loss": 8.6035, "nll_loss": 0.521240234375, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0509796142578125, "rewards/margins": 0.001708984375, "rewards/rejected": -0.0526885986328125, "step": 86 }, { "epoch": 0.046966731898238745, "grad_norm": 0.22231253438634077, "learning_rate": 1.9913984455235993e-05, "log_odds_chosen": -0.0093994140625, "log_odds_ratio": -0.704833984375, "logits/chosen": 0.1714019775390625, "logits/rejected": 0.17742919921875, "logps/chosen": -0.5833740234375, "logps/rejected": -0.552001953125, "loss": 10.7715, "nll_loss": 0.5816650390625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05838775634765625, "rewards/margins": -0.00318145751953125, "rewards/rejected": -0.055206298828125, "step": 87 }, { "epoch": 0.047506579391321954, "grad_norm": 0.19564827935407927, "learning_rate": 1.9911739091262733e-05, "log_odds_chosen": 0.0128173828125, "log_odds_ratio": -0.688720703125, "logits/chosen": 0.1427459716796875, "logits/rejected": 0.2492523193359375, "logps/chosen": -0.50732421875, "logps/rejected": -0.505615234375, "loss": 9.3628, "nll_loss": 0.5198974609375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05077362060546875, "rewards/margins": -0.00022125244140625, "rewards/rejected": -0.0505523681640625, "step": 88 }, { "epoch": 0.048046426884405156, "grad_norm": 0.19732127794778043, "learning_rate": 1.9909464926868806e-05, "log_odds_chosen": 0.12646484375, "log_odds_ratio": -0.63330078125, "logits/chosen": -0.10770988464355469, "logits/rejected": 0.03872871398925781, "logps/chosen": -0.37750244140625, "logps/rejected": -0.41387939453125, "loss": 8.3464, "nll_loss": 0.38232421875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.037750244140625, "rewards/margins": 0.00363922119140625, "rewards/rejected": -0.04138946533203125, "step": 89 }, { "epoch": 0.04858627437748836, "grad_norm": 0.21590252705268023, "learning_rate": 1.9907161968662218e-05, "log_odds_chosen": 0.04119873046875, "log_odds_ratio": -0.674072265625, "logits/chosen": 0.027191162109375, "logits/rejected": 0.02425074577331543, "logps/chosen": -0.449462890625, "logps/rejected": -0.4617919921875, "loss": 9.0757, "nll_loss": 0.457763671875, "rewards/accuracies": 0.5, "rewards/chosen": -0.04498291015625, "rewards/margins": 0.0011749267578125, "rewards/rejected": -0.0461578369140625, "step": 90 }, { "epoch": 0.04912612187057156, "grad_norm": 0.2254603810539787, "learning_rate": 1.9904830223334648e-05, "log_odds_chosen": 0.0133056640625, "log_odds_ratio": -0.687744140625, "logits/chosen": 0.22882080078125, "logits/rejected": 0.3432769775390625, "logps/chosen": -0.53466796875, "logps/rejected": -0.5302734375, "loss": 11.7676, "nll_loss": 0.539794921875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05344390869140625, "rewards/margins": -0.00041961669921875, "rewards/rejected": -0.0530242919921875, "step": 91 }, { "epoch": 0.04966596936365477, "grad_norm": 0.19341272027489617, "learning_rate": 1.990246969766142e-05, "log_odds_chosen": -0.043701171875, "log_odds_ratio": -0.7177734375, "logits/chosen": 0.18869638442993164, "logits/rejected": 0.33123779296875, "logps/chosen": -0.5328369140625, "logps/rejected": -0.506103515625, "loss": 9.4717, "nll_loss": 0.5457763671875, "rewards/accuracies": 0.375, "rewards/chosen": -0.05328369140625, "rewards/margins": -0.00267791748046875, "rewards/rejected": -0.05060577392578125, "step": 92 }, { "epoch": 0.05020581685673797, "grad_norm": 0.2075766636147665, "learning_rate": 1.990008039850149e-05, "log_odds_chosen": -0.04095458984375, "log_odds_ratio": -0.71484375, "logits/chosen": 0.5907630920410156, "logits/rejected": 0.719024658203125, "logps/chosen": -0.639404296875, "logps/rejected": -0.613037109375, "loss": 10.481, "nll_loss": 0.6510009765625, "rewards/accuracies": 0.25, "rewards/chosen": -0.0638885498046875, "rewards/margins": -0.0026092529296875, "rewards/rejected": -0.061279296875, "step": 93 }, { "epoch": 0.050745664349821175, "grad_norm": 0.20178027898011588, "learning_rate": 1.9897662332797412e-05, "log_odds_chosen": 0.00579833984375, "log_odds_ratio": -0.691650390625, "logits/chosen": 0.0605316162109375, "logits/rejected": 0.189178466796875, "logps/chosen": -0.44525146484375, "logps/rejected": -0.4422607421875, "loss": 10.0337, "nll_loss": 0.4522705078125, "rewards/accuracies": 0.625, "rewards/chosen": -0.04453277587890625, "rewards/margins": -0.0002899169921875, "rewards/rejected": -0.04424285888671875, "step": 94 }, { "epoch": 0.05128551184290438, "grad_norm": 0.19393271630431763, "learning_rate": 1.9895215507575335e-05, "log_odds_chosen": -0.00347900390625, "log_odds_ratio": -0.69482421875, "logits/chosen": 0.27691650390625, "logits/rejected": 0.3831167221069336, "logps/chosen": -0.4661865234375, "logps/rejected": -0.46246337890625, "loss": 8.9541, "nll_loss": 0.4761962890625, "rewards/accuracies": 0.5, "rewards/chosen": -0.046630859375, "rewards/margins": -0.00037384033203125, "rewards/rejected": -0.04625701904296875, "step": 95 }, { "epoch": 0.051825359335987586, "grad_norm": 0.2030730202275363, "learning_rate": 1.989273992994496e-05, "log_odds_chosen": -0.0335693359375, "log_odds_ratio": -0.7109375, "logits/chosen": 0.245025634765625, "logits/rejected": 0.40348052978515625, "logps/chosen": -0.5015869140625, "logps/rejected": -0.483154296875, "loss": 9.6602, "nll_loss": 0.5059814453125, "rewards/accuracies": 0.25, "rewards/chosen": -0.0501556396484375, "rewards/margins": -0.0018463134765625, "rewards/rejected": -0.048309326171875, "step": 96 }, { "epoch": 0.05236520682907079, "grad_norm": 0.1866290513077043, "learning_rate": 1.989023560709955e-05, "log_odds_chosen": 0.0008544921875, "log_odds_ratio": -0.693603515625, "logits/chosen": 0.017364501953125, "logits/rejected": 0.1578826904296875, "logps/chosen": -0.49786376953125, "logps/rejected": -0.4881591796875, "loss": 8.9824, "nll_loss": 0.50299072265625, "rewards/accuracies": 0.5, "rewards/chosen": -0.0497589111328125, "rewards/margins": -0.0009613037109375, "rewards/rejected": -0.048797607421875, "step": 97 }, { "epoch": 0.05290505432215399, "grad_norm": 0.17815257146265467, "learning_rate": 1.988770254631588e-05, "log_odds_chosen": -0.0201416015625, "log_odds_ratio": -0.703369140625, "logits/chosen": 0.268951416015625, "logits/rejected": 0.3707733154296875, "logps/chosen": -0.4716796875, "logps/rejected": -0.4613037109375, "loss": 8.5933, "nll_loss": 0.4774169921875, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0471343994140625, "rewards/margins": -0.0009918212890625, "rewards/rejected": -0.046142578125, "step": 98 }, { "epoch": 0.05344490181523719, "grad_norm": 0.188448423784272, "learning_rate": 1.9885140754954242e-05, "log_odds_chosen": 0.0115966796875, "log_odds_ratio": -0.688232421875, "logits/chosen": 0.262359619140625, "logits/rejected": 0.3729248046875, "logps/chosen": -0.4490966796875, "logps/rejected": -0.44927978515625, "loss": 8.447, "nll_loss": 0.456298828125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04491424560546875, "rewards/margins": -7.62939453125e-06, "rewards/rejected": -0.0449066162109375, "step": 99 }, { "epoch": 0.0539847493083204, "grad_norm": 0.21518003610644829, "learning_rate": 1.9882550240458398e-05, "log_odds_chosen": -0.0162353515625, "log_odds_ratio": -0.701416015625, "logits/chosen": 0.1912384033203125, "logits/rejected": 0.3233795166015625, "logps/chosen": -0.5194091796875, "logps/rejected": -0.5115966796875, "loss": 9.5903, "nll_loss": 0.5196533203125, "rewards/accuracies": 0.375, "rewards/chosen": -0.05196380615234375, "rewards/margins": -0.00083160400390625, "rewards/rejected": -0.0511322021484375, "step": 100 }, { "epoch": 0.054524596801403605, "grad_norm": 0.18999621030254335, "learning_rate": 1.9879931010355575e-05, "log_odds_chosen": -0.0555419921875, "log_odds_ratio": -0.7236328125, "logits/chosen": 0.39006805419921875, "logits/rejected": 0.5440826416015625, "logps/chosen": -0.60302734375, "logps/rejected": -0.568603515625, "loss": 8.9805, "nll_loss": 0.6068115234375, "rewards/accuracies": 0.4375, "rewards/chosen": -0.060302734375, "rewards/margins": -0.00344085693359375, "rewards/rejected": -0.05686187744140625, "step": 101 }, { "epoch": 0.05506444429448681, "grad_norm": 0.18714368512772225, "learning_rate": 1.9877283072256437e-05, "log_odds_chosen": 0.00628662109375, "log_odds_ratio": -0.69140625, "logits/chosen": 0.040863037109375, "logits/rejected": 0.16302490234375, "logps/chosen": -0.42950439453125, "logps/rejected": -0.425048828125, "loss": 8.396, "nll_loss": 0.4376220703125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0429229736328125, "rewards/margins": -0.00042724609375, "rewards/rejected": -0.0424957275390625, "step": 102 }, { "epoch": 0.05560429178757001, "grad_norm": 0.19082901374567746, "learning_rate": 1.9874606433855067e-05, "log_odds_chosen": 0.03533935546875, "log_odds_ratio": -0.67724609375, "logits/chosen": 0.35040283203125, "logits/rejected": 0.4317626953125, "logps/chosen": -0.44586181640625, "logps/rejected": -0.451171875, "loss": 9.4038, "nll_loss": 0.45562744140625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.04456329345703125, "rewards/margins": 0.00055694580078125, "rewards/rejected": -0.0451202392578125, "step": 103 }, { "epoch": 0.05614413928065322, "grad_norm": 0.21339833000353756, "learning_rate": 1.987190110292894e-05, "log_odds_chosen": -0.103515625, "log_odds_ratio": -0.752197265625, "logits/chosen": -0.03597068786621094, "logits/rejected": 0.17311859130859375, "logps/chosen": -0.5513916015625, "logps/rejected": -0.4923095703125, "loss": 10.272, "nll_loss": 0.5614013671875, "rewards/accuracies": 0.375, "rewards/chosen": -0.05513763427734375, "rewards/margins": -0.0059356689453125, "rewards/rejected": -0.04920196533203125, "step": 104 }, { "epoch": 0.05668398677373642, "grad_norm": 0.20261553291979095, "learning_rate": 1.9869167087338908e-05, "log_odds_chosen": -0.01190185546875, "log_odds_ratio": -0.703125, "logits/chosen": -0.06844711303710938, "logits/rejected": 0.07861328125, "logps/chosen": -0.70880126953125, "logps/rejected": -0.6776123046875, "loss": 10.3276, "nll_loss": 0.70538330078125, "rewards/accuracies": 0.625, "rewards/chosen": -0.07080841064453125, "rewards/margins": -0.00305938720703125, "rewards/rejected": -0.0677490234375, "step": 105 }, { "epoch": 0.05722383426681962, "grad_norm": 0.20614459396982876, "learning_rate": 1.986640439502916e-05, "log_odds_chosen": -0.013671875, "log_odds_ratio": -0.701904296875, "logits/chosen": 0.15636444091796875, "logits/rejected": 0.2320404052734375, "logps/chosen": -0.4881591796875, "logps/rejected": -0.4820556640625, "loss": 9.6123, "nll_loss": 0.4931640625, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0488128662109375, "rewards/margins": -0.000579833984375, "rewards/rejected": -0.0482330322265625, "step": 106 }, { "epoch": 0.057763681759902825, "grad_norm": 0.19889770736940493, "learning_rate": 1.9863613034027224e-05, "log_odds_chosen": -0.00482177734375, "log_odds_ratio": -0.697509765625, "logits/chosen": 0.08072853088378906, "logits/rejected": 0.21373748779296875, "logps/chosen": -0.5277099609375, "logps/rejected": -0.5164794921875, "loss": 9.3999, "nll_loss": 0.5399169921875, "rewards/accuracies": 0.5, "rewards/chosen": -0.0527496337890625, "rewards/margins": -0.0010986328125, "rewards/rejected": -0.0516510009765625, "step": 107 }, { "epoch": 0.058303529252986035, "grad_norm": 0.20485012440509975, "learning_rate": 1.9860793012443922e-05, "log_odds_chosen": -0.03631591796875, "log_odds_ratio": -0.718017578125, "logits/chosen": 0.14657115936279297, "logits/rejected": 0.28720855712890625, "logps/chosen": -0.6546630859375, "logps/rejected": -0.6094970703125, "loss": 10.0547, "nll_loss": 0.6571044921875, "rewards/accuracies": 0.5, "rewards/chosen": -0.0655059814453125, "rewards/margins": -0.00455474853515625, "rewards/rejected": -0.06095123291015625, "step": 108 }, { "epoch": 0.05884337674606924, "grad_norm": 0.22587148976780896, "learning_rate": 1.9857944338473355e-05, "log_odds_chosen": -0.0302734375, "log_odds_ratio": -0.709228515625, "logits/chosen": 0.36576080322265625, "logits/rejected": 0.3531341552734375, "logps/chosen": -0.52392578125, "logps/rejected": -0.51220703125, "loss": 10.8745, "nll_loss": 0.5343017578125, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0524139404296875, "rewards/margins": -0.001190185546875, "rewards/rejected": -0.0512237548828125, "step": 109 }, { "epoch": 0.05938322423915244, "grad_norm": 0.18000367438924872, "learning_rate": 1.985506702039288e-05, "log_odds_chosen": -0.0394287109375, "log_odds_ratio": -0.715087890625, "logits/chosen": 0.18978500366210938, "logits/rejected": 0.367156982421875, "logps/chosen": -0.48681640625, "logps/rejected": -0.4671630859375, "loss": 8.7153, "nll_loss": 0.51416015625, "rewards/accuracies": 0.375, "rewards/chosen": -0.04865264892578125, "rewards/margins": -0.00196075439453125, "rewards/rejected": -0.04669189453125, "step": 110 }, { "epoch": 0.05992307173223564, "grad_norm": 0.20895432758927465, "learning_rate": 1.9852161066563087e-05, "log_odds_chosen": -0.06365966796875, "log_odds_ratio": -0.728271484375, "logits/chosen": 0.2486572265625, "logits/rejected": 0.4338836669921875, "logps/chosen": -0.50177001953125, "logps/rejected": -0.4727783203125, "loss": 9.5786, "nll_loss": 0.5211181640625, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0501708984375, "rewards/margins": -0.002899169921875, "rewards/rejected": -0.047271728515625, "step": 111 }, { "epoch": 0.06046291922531885, "grad_norm": 0.2437111802593175, "learning_rate": 1.984922648542777e-05, "log_odds_chosen": -0.11859130859375, "log_odds_ratio": -0.7626953125, "logits/chosen": 0.353515625, "logits/rejected": 0.47442626953125, "logps/chosen": -0.65924072265625, "logps/rejected": -0.576171875, "loss": 10.3701, "nll_loss": 0.67095947265625, "rewards/accuracies": 0.375, "rewards/chosen": -0.06589508056640625, "rewards/margins": -0.00830078125, "rewards/rejected": -0.05759429931640625, "step": 112 }, { "epoch": 0.06100276671840205, "grad_norm": 0.1972723176948166, "learning_rate": 1.984626328551391e-05, "log_odds_chosen": -0.01800537109375, "log_odds_ratio": -0.702880859375, "logits/chosen": 0.28570556640625, "logits/rejected": 0.4182586669921875, "logps/chosen": -0.5655517578125, "logps/rejected": -0.5501708984375, "loss": 8.7349, "nll_loss": 0.5687255859375, "rewards/accuracies": 0.5, "rewards/chosen": -0.05657196044921875, "rewards/margins": -0.00154876708984375, "rewards/rejected": -0.055023193359375, "step": 113 }, { "epoch": 0.061542614211485255, "grad_norm": 0.20795670074804046, "learning_rate": 1.9843271475431635e-05, "log_odds_chosen": 0.01361083984375, "log_odds_ratio": -0.688232421875, "logits/chosen": 0.27880859375, "logits/rejected": 0.405029296875, "logps/chosen": -0.570556640625, "logps/rejected": -0.5653076171875, "loss": 10.209, "nll_loss": 0.581787109375, "rewards/accuracies": 0.5, "rewards/chosen": -0.057037353515625, "rewards/margins": -0.0004730224609375, "rewards/rejected": -0.0565643310546875, "step": 114 }, { "epoch": 0.06208246170456846, "grad_norm": 0.21408144402427656, "learning_rate": 1.9840251063874216e-05, "log_odds_chosen": -0.0672607421875, "log_odds_ratio": -0.733642578125, "logits/chosen": 0.258087158203125, "logits/rejected": 0.3736076354980469, "logps/chosen": -0.724365234375, "logps/rejected": -0.6640625, "loss": 11.5383, "nll_loss": 0.7215576171875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.072479248046875, "rewards/margins": -0.006072998046875, "rewards/rejected": -0.06640625, "step": 115 }, { "epoch": 0.06262230919765166, "grad_norm": 0.18080930340367815, "learning_rate": 1.9837202059618027e-05, "log_odds_chosen": 0.0426025390625, "log_odds_ratio": -0.673583984375, "logits/chosen": 0.012542724609375, "logits/rejected": 0.11022377014160156, "logps/chosen": -0.5228271484375, "logps/rejected": -0.52685546875, "loss": 8.6128, "nll_loss": 0.52294921875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0522918701171875, "rewards/margins": 0.00041961669921875, "rewards/rejected": -0.05271148681640625, "step": 116 }, { "epoch": 0.06316215669073487, "grad_norm": 0.2090319448245005, "learning_rate": 1.983412447152252e-05, "log_odds_chosen": 0.0595703125, "log_odds_ratio": -0.666259765625, "logits/chosen": 0.4745521545410156, "logits/rejected": 0.624664306640625, "logps/chosen": -0.5357666015625, "logps/rejected": -0.5477294921875, "loss": 10.395, "nll_loss": 0.54083251953125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0535736083984375, "rewards/margins": 0.0011749267578125, "rewards/rejected": -0.05474853515625, "step": 117 }, { "epoch": 0.06370200418381806, "grad_norm": 0.19724268891907532, "learning_rate": 1.9831018308530202e-05, "log_odds_chosen": 0.040771484375, "log_odds_ratio": -0.67431640625, "logits/chosen": 0.26275634765625, "logits/rejected": 0.247039794921875, "logps/chosen": -0.448486328125, "logps/rejected": -0.461181640625, "loss": 9.249, "nll_loss": 0.4591064453125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0448455810546875, "rewards/margins": 0.001251220703125, "rewards/rejected": -0.0460968017578125, "step": 118 }, { "epoch": 0.06424185167690127, "grad_norm": 0.2220754388769577, "learning_rate": 1.9827883579666622e-05, "log_odds_chosen": 0.0257568359375, "log_odds_ratio": -0.6826171875, "logits/chosen": 0.3352937698364258, "logits/rejected": 0.3861541748046875, "logps/chosen": -0.47314453125, "logps/rejected": -0.4779052734375, "loss": 10.0288, "nll_loss": 0.4840087890625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04730987548828125, "rewards/margins": 0.00046539306640625, "rewards/rejected": -0.0477752685546875, "step": 119 }, { "epoch": 0.06478169916998448, "grad_norm": 0.2098177857335871, "learning_rate": 1.982472029404032e-05, "log_odds_chosen": -0.0162353515625, "log_odds_ratio": -0.70263671875, "logits/chosen": 0.22709274291992188, "logits/rejected": 0.445645809173584, "logps/chosen": -0.675048828125, "logps/rejected": -0.6544189453125, "loss": 9.7683, "nll_loss": 0.6832275390625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0674591064453125, "rewards/margins": -0.00199127197265625, "rewards/rejected": -0.06546783447265625, "step": 120 }, { "epoch": 0.06532154666306768, "grad_norm": 0.19633178026160952, "learning_rate": 1.9821528460842813e-05, "log_odds_chosen": -0.01190185546875, "log_odds_ratio": -0.701904296875, "logits/chosen": 0.35858821868896484, "logits/rejected": 0.48638916015625, "logps/chosen": -0.50592041015625, "logps/rejected": -0.4954833984375, "loss": 9.772, "nll_loss": 0.51629638671875, "rewards/accuracies": 0.375, "rewards/chosen": -0.0505828857421875, "rewards/margins": -0.00101470947265625, "rewards/rejected": -0.04956817626953125, "step": 121 }, { "epoch": 0.06586139415615089, "grad_norm": 0.21659945975823597, "learning_rate": 1.9818308089348577e-05, "log_odds_chosen": 0.0303955078125, "log_odds_ratio": -0.6806640625, "logits/chosen": 0.2641258239746094, "logits/rejected": 0.43096160888671875, "logps/chosen": -0.568115234375, "logps/rejected": -0.5697021484375, "loss": 8.9546, "nll_loss": 0.5838623046875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.056793212890625, "rewards/margins": 0.0001678466796875, "rewards/rejected": -0.0569610595703125, "step": 122 }, { "epoch": 0.0664012416492341, "grad_norm": 0.19116221144317683, "learning_rate": 1.9815059188915006e-05, "log_odds_chosen": -0.0238037109375, "log_odds_ratio": -0.706298828125, "logits/chosen": 0.1784515380859375, "logits/rejected": 0.28107452392578125, "logps/chosen": -0.5404052734375, "logps/rejected": -0.521240234375, "loss": 9.6187, "nll_loss": 0.5467529296875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05410003662109375, "rewards/margins": -0.001983642578125, "rewards/rejected": -0.05211639404296875, "step": 123 }, { "epoch": 0.06694108914231729, "grad_norm": 0.21773760851516685, "learning_rate": 1.9811781768982392e-05, "log_odds_chosen": 0.04498291015625, "log_odds_ratio": -0.673583984375, "logits/chosen": 0.20767974853515625, "logits/rejected": 0.3819389343261719, "logps/chosen": -0.53375244140625, "logps/rejected": -0.5341796875, "loss": 10.2256, "nll_loss": 0.5426025390625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0533905029296875, "rewards/margins": 5.340576171875e-05, "rewards/rejected": -0.05344390869140625, "step": 124 }, { "epoch": 0.0674809366354005, "grad_norm": 0.2045007318374, "learning_rate": 1.980847583907389e-05, "log_odds_chosen": 0.04248046875, "log_odds_ratio": -0.673095703125, "logits/chosen": 0.24004364013671875, "logits/rejected": 0.3698692321777344, "logps/chosen": -0.4747314453125, "logps/rejected": -0.4874267578125, "loss": 9.0728, "nll_loss": 0.486328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0474853515625, "rewards/margins": 0.00128173828125, "rewards/rejected": -0.04876708984375, "step": 125 }, { "epoch": 0.0680207841284837, "grad_norm": 0.200953398636224, "learning_rate": 1.9805141408795505e-05, "log_odds_chosen": 0.0062255859375, "log_odds_ratio": -0.691650390625, "logits/chosen": 0.30803680419921875, "logits/rejected": 0.41500091552734375, "logps/chosen": -0.502685546875, "logps/rejected": -0.5006103515625, "loss": 9.52, "nll_loss": 0.529052734375, "rewards/accuracies": 0.3125, "rewards/chosen": -0.05027008056640625, "rewards/margins": -0.00026702880859375, "rewards/rejected": -0.0500030517578125, "step": 126 }, { "epoch": 0.0685606316215669, "grad_norm": 0.2014744727345127, "learning_rate": 1.9801778487836046e-05, "log_odds_chosen": 0.1629638671875, "log_odds_ratio": -0.617431640625, "logits/chosen": 0.057891845703125, "logits/rejected": 0.114288330078125, "logps/chosen": -0.3709716796875, "logps/rejected": -0.419921875, "loss": 9.6948, "nll_loss": 0.37786865234375, "rewards/accuracies": 0.875, "rewards/chosen": -0.0370941162109375, "rewards/margins": 0.004913330078125, "rewards/rejected": -0.0420074462890625, "step": 127 }, { "epoch": 0.06910047911465012, "grad_norm": 0.1980775497596348, "learning_rate": 1.979838708596712e-05, "log_odds_chosen": 0.1053466796875, "log_odds_ratio": -0.64501953125, "logits/chosen": 0.1241455078125, "logits/rejected": 0.23374557495117188, "logps/chosen": -0.47723388671875, "logps/rejected": -0.508056640625, "loss": 9.4697, "nll_loss": 0.47607421875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0477294921875, "rewards/margins": 0.00310516357421875, "rewards/rejected": -0.05083465576171875, "step": 128 }, { "epoch": 0.06964032660773331, "grad_norm": 0.19668085677574876, "learning_rate": 1.9794967213043073e-05, "log_odds_chosen": 0.016845703125, "log_odds_ratio": -0.68798828125, "logits/chosen": -0.11598968505859375, "logits/rejected": -0.01140594482421875, "logps/chosen": -0.5157470703125, "logps/rejected": -0.5064697265625, "loss": 9.9165, "nll_loss": 0.512451171875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05159759521484375, "rewards/margins": -0.0009765625, "rewards/rejected": -0.05062103271484375, "step": 129 }, { "epoch": 0.07018017410081652, "grad_norm": 0.20781101176005565, "learning_rate": 1.9791518879000995e-05, "log_odds_chosen": -0.0003662109375, "log_odds_ratio": -0.70166015625, "logits/chosen": 0.089385986328125, "logits/rejected": 0.21484375, "logps/chosen": -0.6231689453125, "logps/rejected": -0.584228515625, "loss": 9.6182, "nll_loss": 0.62469482421875, "rewards/accuracies": 0.625, "rewards/chosen": -0.06232452392578125, "rewards/margins": -0.00390625, "rewards/rejected": -0.05841827392578125, "step": 130 }, { "epoch": 0.07072002159389973, "grad_norm": 0.19251407393788653, "learning_rate": 1.9788042093860665e-05, "log_odds_chosen": 0.14599609375, "log_odds_ratio": -0.6258544921875, "logits/chosen": 0.07324409484863281, "logits/rejected": 0.1742095947265625, "logps/chosen": -0.4007568359375, "logps/rejected": -0.4483642578125, "loss": 8.8574, "nll_loss": 0.4012451171875, "rewards/accuracies": 0.875, "rewards/chosen": -0.040069580078125, "rewards/margins": 0.0047760009765625, "rewards/rejected": -0.0448455810546875, "step": 131 }, { "epoch": 0.07125986908698292, "grad_norm": 0.20830703116901278, "learning_rate": 1.9784536867724538e-05, "log_odds_chosen": -0.0074462890625, "log_odds_ratio": -0.7001953125, "logits/chosen": 0.4496002197265625, "logits/rejected": 0.4993782043457031, "logps/chosen": -0.5745849609375, "logps/rejected": -0.5650634765625, "loss": 9.8521, "nll_loss": 0.5860595703125, "rewards/accuracies": 0.5, "rewards/chosen": -0.0574493408203125, "rewards/margins": -0.00093841552734375, "rewards/rejected": -0.05651092529296875, "step": 132 }, { "epoch": 0.07179971658006613, "grad_norm": 0.20714747713586054, "learning_rate": 1.978100321077771e-05, "log_odds_chosen": 0.1085205078125, "log_odds_ratio": -0.64404296875, "logits/chosen": 0.12827301025390625, "logits/rejected": 0.19061279296875, "logps/chosen": -0.50421142578125, "logps/rejected": -0.5355224609375, "loss": 9.3887, "nll_loss": 0.52276611328125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05042266845703125, "rewards/margins": 0.0030975341796875, "rewards/rejected": -0.05352020263671875, "step": 133 }, { "epoch": 0.07233956407314933, "grad_norm": 0.2608390298713881, "learning_rate": 1.9777441133287875e-05, "log_odds_chosen": 0.1463623046875, "log_odds_ratio": -0.63232421875, "logits/chosen": 0.18006515502929688, "logits/rejected": 0.21509552001953125, "logps/chosen": -0.50653076171875, "logps/rejected": -0.551513671875, "loss": 9.0073, "nll_loss": 0.506591796875, "rewards/accuracies": 0.625, "rewards/chosen": -0.0506134033203125, "rewards/margins": 0.00457763671875, "rewards/rejected": -0.0551910400390625, "step": 134 }, { "epoch": 0.07287941156623254, "grad_norm": 0.22636716131425105, "learning_rate": 1.9773850645605328e-05, "log_odds_chosen": 0.046875, "log_odds_ratio": -0.676025390625, "logits/chosen": 0.24786376953125, "logits/rejected": 0.339691162109375, "logps/chosen": -0.45989990234375, "logps/rejected": -0.4674072265625, "loss": 9.7959, "nll_loss": 0.4808349609375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04596710205078125, "rewards/margins": 0.00081634521484375, "rewards/rejected": -0.046783447265625, "step": 135 }, { "epoch": 0.07341925905931575, "grad_norm": 0.1904783437462399, "learning_rate": 1.97702317581629e-05, "log_odds_chosen": 0.147705078125, "log_odds_ratio": -0.6270751953125, "logits/chosen": 0.2494354248046875, "logits/rejected": 0.34175872802734375, "logps/chosen": -0.49810791015625, "logps/rejected": -0.546142578125, "loss": 8.8301, "nll_loss": 0.51763916015625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0498199462890625, "rewards/margins": 0.0047454833984375, "rewards/rejected": -0.0545654296875, "step": 136 }, { "epoch": 0.07395910655239894, "grad_norm": 0.21221307713952764, "learning_rate": 1.9766584481475958e-05, "log_odds_chosen": 0.0367431640625, "log_odds_ratio": -0.6798095703125, "logits/chosen": 0.38317108154296875, "logits/rejected": 0.4044761657714844, "logps/chosen": -0.6263427734375, "logps/rejected": -0.6533203125, "loss": 8.9937, "nll_loss": 0.629150390625, "rewards/accuracies": 0.5, "rewards/chosen": -0.06269073486328125, "rewards/margins": 0.002655029296875, "rewards/rejected": -0.06534576416015625, "step": 137 }, { "epoch": 0.07449895404548215, "grad_norm": 0.24079510307765253, "learning_rate": 1.9762908826142342e-05, "log_odds_chosen": 0.0875244140625, "log_odds_ratio": -0.65673828125, "logits/chosen": 0.04988929629325867, "logits/rejected": 0.1737060546875, "logps/chosen": -0.545166015625, "logps/rejected": -0.57421875, "loss": 10.9121, "nll_loss": 0.552001953125, "rewards/accuracies": 0.75, "rewards/chosen": -0.05452728271484375, "rewards/margins": 0.00286102294921875, "rewards/rejected": -0.0573883056640625, "step": 138 }, { "epoch": 0.07503880153856536, "grad_norm": 0.19837687728758052, "learning_rate": 1.975920480284236e-05, "log_odds_chosen": 0.1219482421875, "log_odds_ratio": -0.6419677734375, "logits/chosen": 0.0417938232421875, "logits/rejected": 0.13568496704101562, "logps/chosen": -0.49761962890625, "logps/rejected": -0.5145263671875, "loss": 8.896, "nll_loss": 0.500732421875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04979705810546875, "rewards/margins": 0.00162506103515625, "rewards/rejected": -0.051422119140625, "step": 139 }, { "epoch": 0.07557864903164856, "grad_norm": 0.18584891276838927, "learning_rate": 1.9755472422338746e-05, "log_odds_chosen": 0.1165771484375, "log_odds_ratio": -0.64697265625, "logits/chosen": -0.0705718994140625, "logits/rejected": -0.05527305603027344, "logps/chosen": -0.5067138671875, "logps/rejected": -0.52691650390625, "loss": 8.7007, "nll_loss": 0.508056640625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0506591796875, "rewards/margins": 0.00201416015625, "rewards/rejected": -0.05267333984375, "step": 140 }, { "epoch": 0.07611849652473177, "grad_norm": 0.19202392759248632, "learning_rate": 1.9751711695476638e-05, "log_odds_chosen": 0.07373046875, "log_odds_ratio": -0.6591796875, "logits/chosen": 0.33634185791015625, "logits/rejected": 0.4078521728515625, "logps/chosen": -0.4786376953125, "logps/rejected": -0.5035400390625, "loss": 9.3655, "nll_loss": 0.503662109375, "rewards/accuracies": 0.625, "rewards/chosen": -0.04787445068359375, "rewards/margins": 0.00247955322265625, "rewards/rejected": -0.05035400390625, "step": 141 }, { "epoch": 0.07665834401781496, "grad_norm": 0.19690785033787944, "learning_rate": 1.9747922633183533e-05, "log_odds_chosen": 0.091796875, "log_odds_ratio": -0.6524658203125, "logits/chosen": 0.0783233642578125, "logits/rejected": 0.2034149169921875, "logps/chosen": -0.44476318359375, "logps/rejected": -0.4705810546875, "loss": 9.0273, "nll_loss": 0.45989990234375, "rewards/accuracies": 0.625, "rewards/chosen": -0.044464111328125, "rewards/margins": 0.0025787353515625, "rewards/rejected": -0.0470428466796875, "step": 142 }, { "epoch": 0.07719819151089817, "grad_norm": 0.22750069857817773, "learning_rate": 1.9744105246469264e-05, "log_odds_chosen": 0.11065673828125, "log_odds_ratio": -0.644287109375, "logits/chosen": -0.0286865234375, "logits/rejected": 0.1756591796875, "logps/chosen": -0.59881591796875, "logps/rejected": -0.61865234375, "loss": 9.937, "nll_loss": 0.60235595703125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05989837646484375, "rewards/margins": 0.00194549560546875, "rewards/rejected": -0.0618438720703125, "step": 143 }, { "epoch": 0.07773803900398138, "grad_norm": 0.21038916881553352, "learning_rate": 1.9740259546425958e-05, "log_odds_chosen": 0.0611572265625, "log_odds_ratio": -0.667236328125, "logits/chosen": 0.14349365234375, "logits/rejected": 0.2569580078125, "logps/chosen": -0.585205078125, "logps/rejected": -0.580078125, "loss": 9.0811, "nll_loss": 0.5869140625, "rewards/accuracies": 0.625, "rewards/chosen": -0.0585174560546875, "rewards/margins": -0.000518798828125, "rewards/rejected": -0.0579986572265625, "step": 144 }, { "epoch": 0.07827788649706457, "grad_norm": 0.21139362793274277, "learning_rate": 1.9736385544228032e-05, "log_odds_chosen": 0.0885009765625, "log_odds_ratio": -0.651611328125, "logits/chosen": 0.16295623779296875, "logits/rejected": 0.27801513671875, "logps/chosen": -0.4730224609375, "logps/rejected": -0.50537109375, "loss": 9.1362, "nll_loss": 0.48638916015625, "rewards/accuracies": 0.75, "rewards/chosen": -0.047271728515625, "rewards/margins": 0.0032806396484375, "rewards/rejected": -0.0505523681640625, "step": 145 }, { "epoch": 0.07881773399014778, "grad_norm": 0.18084051300310902, "learning_rate": 1.973248325113212e-05, "log_odds_chosen": 0.029052734375, "log_odds_ratio": -0.679931640625, "logits/chosen": 0.6653022766113281, "logits/rejected": 0.688178539276123, "logps/chosen": -0.555419921875, "logps/rejected": -0.56396484375, "loss": 9.3682, "nll_loss": 0.5623779296875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0555267333984375, "rewards/margins": 0.000885009765625, "rewards/rejected": -0.0564117431640625, "step": 146 }, { "epoch": 0.079357581483231, "grad_norm": 0.18110948865440443, "learning_rate": 1.972855267847707e-05, "log_odds_chosen": 0.05712890625, "log_odds_ratio": -0.666015625, "logits/chosen": 0.2546539306640625, "logits/rejected": 0.3619842529296875, "logps/chosen": -0.4346923828125, "logps/rejected": -0.4520263671875, "loss": 9.3169, "nll_loss": 0.4407958984375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0435028076171875, "rewards/margins": 0.00170135498046875, "rewards/rejected": -0.04520416259765625, "step": 147 }, { "epoch": 0.07989742897631419, "grad_norm": 0.19065457572744396, "learning_rate": 1.9724593837683894e-05, "log_odds_chosen": 0.05908203125, "log_odds_ratio": -0.666259765625, "logits/chosen": 0.48409271240234375, "logits/rejected": 0.5571365356445312, "logps/chosen": -0.52264404296875, "logps/rejected": -0.5384521484375, "loss": 10.0132, "nll_loss": 0.54144287109375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.052276611328125, "rewards/margins": 0.00156402587890625, "rewards/rejected": -0.05384063720703125, "step": 148 }, { "epoch": 0.0804372764693974, "grad_norm": 0.19504629827638117, "learning_rate": 1.9720606740255756e-05, "log_odds_chosen": 0.074462890625, "log_odds_ratio": -0.658203125, "logits/chosen": 0.3884429931640625, "logits/rejected": 0.49857330322265625, "logps/chosen": -0.4842529296875, "logps/rejected": -0.5091552734375, "loss": 9.2134, "nll_loss": 0.4951171875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0484161376953125, "rewards/margins": 0.0025177001953125, "rewards/rejected": -0.050933837890625, "step": 149 }, { "epoch": 0.0809771239624806, "grad_norm": 0.16814653318753667, "learning_rate": 1.971659139777791e-05, "log_odds_chosen": 0.08392333984375, "log_odds_ratio": -0.65478515625, "logits/chosen": 0.178680419921875, "logits/rejected": 0.2325897216796875, "logps/chosen": -0.43829345703125, "logps/rejected": -0.458740234375, "loss": 8.6636, "nll_loss": 0.44549560546875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04383087158203125, "rewards/margins": 0.002044677734375, "rewards/rejected": -0.04587554931640625, "step": 150 }, { "epoch": 0.0815169714555638, "grad_norm": 0.21137913435657552, "learning_rate": 1.9712547821917695e-05, "log_odds_chosen": 0.11004638671875, "log_odds_ratio": -0.6436767578125, "logits/chosen": -0.007843017578125, "logits/rejected": 0.1061859130859375, "logps/chosen": -0.559326171875, "logps/rejected": -0.5919189453125, "loss": 9.4814, "nll_loss": 0.5694580078125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05591583251953125, "rewards/margins": 0.00322723388671875, "rewards/rejected": -0.05914306640625, "step": 151 }, { "epoch": 0.08205681894864701, "grad_norm": 0.17989096131643134, "learning_rate": 1.9708476024424477e-05, "log_odds_chosen": 0.0518798828125, "log_odds_ratio": -0.670166015625, "logits/chosen": 0.389984130859375, "logits/rejected": 0.48822975158691406, "logps/chosen": -0.50128173828125, "logps/rejected": -0.5145263671875, "loss": 8.9844, "nll_loss": 0.505859375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0501556396484375, "rewards/margins": 0.00128173828125, "rewards/rejected": -0.0514373779296875, "step": 152 }, { "epoch": 0.08259666644173021, "grad_norm": 0.18341649624048614, "learning_rate": 1.9704376017129627e-05, "log_odds_chosen": 0.0552978515625, "log_odds_ratio": -0.66748046875, "logits/chosen": 0.43255615234375, "logits/rejected": 0.58380126953125, "logps/chosen": -0.572265625, "logps/rejected": -0.5860595703125, "loss": 9.2451, "nll_loss": 0.5821533203125, "rewards/accuracies": 0.625, "rewards/chosen": -0.057220458984375, "rewards/margins": 0.001373291015625, "rewards/rejected": -0.05859375, "step": 153 }, { "epoch": 0.08313651393481342, "grad_norm": 0.1748759724151042, "learning_rate": 1.970024781194649e-05, "log_odds_chosen": 0.080322265625, "log_odds_ratio": -0.65576171875, "logits/chosen": 0.2247142791748047, "logits/rejected": 0.3358154296875, "logps/chosen": -0.478515625, "logps/rejected": -0.501953125, "loss": 8.7754, "nll_loss": 0.4876708984375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0478668212890625, "rewards/margins": 0.00232696533203125, "rewards/rejected": -0.05019378662109375, "step": 154 }, { "epoch": 0.08367636142789663, "grad_norm": 0.187733870403223, "learning_rate": 1.969609142087034e-05, "log_odds_chosen": 0.0894775390625, "log_odds_ratio": -0.65234375, "logits/chosen": 0.368377685546875, "logits/rejected": 0.4636383056640625, "logps/chosen": -0.47998046875, "logps/rejected": -0.5059814453125, "loss": 8.6191, "nll_loss": 0.489501953125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04803466796875, "rewards/margins": 0.00255584716796875, "rewards/rejected": -0.05059051513671875, "step": 155 }, { "epoch": 0.08421620892097982, "grad_norm": 0.2025891368636923, "learning_rate": 1.9691906855978354e-05, "log_odds_chosen": 0.051513671875, "log_odds_ratio": -0.673583984375, "logits/chosen": 0.3851184844970703, "logits/rejected": 0.5178890228271484, "logps/chosen": -0.620361328125, "logps/rejected": -0.615966796875, "loss": 9.6416, "nll_loss": 0.623046875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.06207275390625, "rewards/margins": -0.00048828125, "rewards/rejected": -0.06158447265625, "step": 156 }, { "epoch": 0.08475605641406303, "grad_norm": 0.1984475896222435, "learning_rate": 1.968769412942957e-05, "log_odds_chosen": 0.15576171875, "log_odds_ratio": -0.6234130859375, "logits/chosen": 0.08596992492675781, "logits/rejected": 0.1866912841796875, "logps/chosen": -0.4732666015625, "logps/rejected": -0.5177001953125, "loss": 9.1909, "nll_loss": 0.48028564453125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0473175048828125, "rewards/margins": 0.004486083984375, "rewards/rejected": -0.0518035888671875, "step": 157 }, { "epoch": 0.08529590390714623, "grad_norm": 0.1985133605800331, "learning_rate": 1.968345325346486e-05, "log_odds_chosen": -0.0029296875, "log_odds_ratio": -0.706298828125, "logits/chosen": 0.32037925720214844, "logits/rejected": 0.42034912109375, "logps/chosen": -0.67236328125, "logps/rejected": -0.63525390625, "loss": 9.3452, "nll_loss": 0.6861572265625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0672607421875, "rewards/margins": -0.0037078857421875, "rewards/rejected": -0.0635528564453125, "step": 158 }, { "epoch": 0.08583575140022943, "grad_norm": 0.21435160540387715, "learning_rate": 1.9679184240406882e-05, "log_odds_chosen": 0.141845703125, "log_odds_ratio": -0.630126953125, "logits/chosen": 0.063323974609375, "logits/rejected": 0.211090087890625, "logps/chosen": -0.55987548828125, "logps/rejected": -0.60107421875, "loss": 9.3667, "nll_loss": 0.55804443359375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05594635009765625, "rewards/margins": 0.00412750244140625, "rewards/rejected": -0.0600738525390625, "step": 159 }, { "epoch": 0.08637559889331264, "grad_norm": 0.20356198134671286, "learning_rate": 1.9674887102660052e-05, "log_odds_chosen": 0.11083984375, "log_odds_ratio": -0.64501953125, "logits/chosen": 0.2469635009765625, "logits/rejected": 0.28636741638183594, "logps/chosen": -0.4649658203125, "logps/rejected": -0.5028076171875, "loss": 10.8203, "nll_loss": 0.4691162109375, "rewards/accuracies": 0.625, "rewards/chosen": -0.04650115966796875, "rewards/margins": 0.00377655029296875, "rewards/rejected": -0.0502777099609375, "step": 160 }, { "epoch": 0.08691544638639584, "grad_norm": 0.19845157751880707, "learning_rate": 1.9670561852710522e-05, "log_odds_chosen": 0.09912109375, "log_odds_ratio": -0.65673828125, "logits/chosen": 0.24977970123291016, "logits/rejected": 0.2294921875, "logps/chosen": -0.5550537109375, "logps/rejected": -0.57720947265625, "loss": 8.9346, "nll_loss": 0.5655517578125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0555267333984375, "rewards/margins": 0.002197265625, "rewards/rejected": -0.0577239990234375, "step": 161 }, { "epoch": 0.08745529387947905, "grad_norm": 0.27460246363517804, "learning_rate": 1.9666208503126115e-05, "log_odds_chosen": 0.043701171875, "log_odds_ratio": -0.67822265625, "logits/chosen": 0.3623199462890625, "logits/rejected": 0.4366455078125, "logps/chosen": -0.551513671875, "logps/rejected": -0.5684814453125, "loss": 11.1187, "nll_loss": 0.569091796875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.055145263671875, "rewards/margins": 0.001708984375, "rewards/rejected": -0.056854248046875, "step": 162 }, { "epoch": 0.08799514137256226, "grad_norm": 0.20448849893290005, "learning_rate": 1.96618270665563e-05, "log_odds_chosen": 0.156005859375, "log_odds_ratio": -0.621826171875, "logits/chosen": 0.5023231506347656, "logits/rejected": 0.5023307800292969, "logps/chosen": -0.5386962890625, "logps/rejected": -0.6025390625, "loss": 9.457, "nll_loss": 0.541748046875, "rewards/accuracies": 0.875, "rewards/chosen": -0.0538787841796875, "rewards/margins": 0.006378173828125, "rewards/rejected": -0.0602569580078125, "step": 163 }, { "epoch": 0.08853498886564545, "grad_norm": 0.22996393355431613, "learning_rate": 1.965741755573217e-05, "log_odds_chosen": 0.123046875, "log_odds_ratio": -0.64013671875, "logits/chosen": 0.22296142578125, "logits/rejected": 0.25337982177734375, "logps/chosen": -0.5113525390625, "logps/rejected": -0.5518798828125, "loss": 9.396, "nll_loss": 0.5159912109375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05109405517578125, "rewards/margins": 0.00405120849609375, "rewards/rejected": -0.055145263671875, "step": 164 }, { "epoch": 0.08907483635872866, "grad_norm": 0.21447310386688176, "learning_rate": 1.965297998346639e-05, "log_odds_chosen": 0.122314453125, "log_odds_ratio": -0.638671875, "logits/chosen": 0.35530853271484375, "logits/rejected": 0.4766998291015625, "logps/chosen": -0.462890625, "logps/rejected": -0.5003662109375, "loss": 9.2271, "nll_loss": 0.483154296875, "rewards/accuracies": 0.75, "rewards/chosen": -0.046295166015625, "rewards/margins": 0.0037078857421875, "rewards/rejected": -0.0500030517578125, "step": 165 }, { "epoch": 0.08961468385181186, "grad_norm": 0.24199764504954704, "learning_rate": 1.964851436265315e-05, "log_odds_chosen": -0.0032958984375, "log_odds_ratio": -0.69873046875, "logits/chosen": 0.4074134826660156, "logits/rejected": 0.491241455078125, "logps/chosen": -0.6729736328125, "logps/rejected": -0.6666259765625, "loss": 10.5356, "nll_loss": 0.68115234375, "rewards/accuracies": 0.5, "rewards/chosen": -0.06732177734375, "rewards/margins": -0.00070953369140625, "rewards/rejected": -0.06661224365234375, "step": 166 }, { "epoch": 0.09015453134489507, "grad_norm": 0.21757824521044145, "learning_rate": 1.9644020706268162e-05, "log_odds_chosen": -0.0523681640625, "log_odds_ratio": -0.7423095703125, "logits/chosen": 0.0210113525390625, "logits/rejected": 0.139404296875, "logps/chosen": -0.74798583984375, "logps/rejected": -0.648193359375, "loss": 9.7485, "nll_loss": 0.75115966796875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.07476806640625, "rewards/margins": -0.0098876953125, "rewards/rejected": -0.06488037109375, "step": 167 }, { "epoch": 0.09069437883797828, "grad_norm": 0.20485594223838963, "learning_rate": 1.963949902736858e-05, "log_odds_chosen": 0.023681640625, "log_odds_ratio": -0.68408203125, "logits/chosen": 0.361907958984375, "logits/rejected": 0.444305419921875, "logps/chosen": -0.47381591796875, "logps/rejected": -0.4774169921875, "loss": 9.314, "nll_loss": 0.4742431640625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04740142822265625, "rewards/margins": 0.0003509521484375, "rewards/rejected": -0.04775238037109375, "step": 168 }, { "epoch": 0.09123422633106147, "grad_norm": 0.196723293192322, "learning_rate": 1.9634949339093003e-05, "log_odds_chosen": 0.0157470703125, "log_odds_ratio": -0.69091796875, "logits/chosen": 0.30428314208984375, "logits/rejected": 0.37811279296875, "logps/chosen": -0.51336669921875, "logps/rejected": -0.505126953125, "loss": 9.488, "nll_loss": 0.5186767578125, "rewards/accuracies": 0.5, "rewards/chosen": -0.0513153076171875, "rewards/margins": -0.0008087158203125, "rewards/rejected": -0.050506591796875, "step": 169 }, { "epoch": 0.09177407382414468, "grad_norm": 0.19773992883088393, "learning_rate": 1.9630371654661392e-05, "log_odds_chosen": 0.065185546875, "log_odds_ratio": -0.66650390625, "logits/chosen": 0.256103515625, "logits/rejected": 0.40711212158203125, "logps/chosen": -0.55572509765625, "logps/rejected": -0.560546875, "loss": 9.4502, "nll_loss": 0.5592041015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.05554962158203125, "rewards/margins": 0.00052642822265625, "rewards/rejected": -0.0560760498046875, "step": 170 }, { "epoch": 0.09231392131722789, "grad_norm": 0.2068247102356077, "learning_rate": 1.9625765987375077e-05, "log_odds_chosen": -0.0296630859375, "log_odds_ratio": -0.717041015625, "logits/chosen": -0.00067901611328125, "logits/rejected": 0.18144607543945312, "logps/chosen": -0.5291748046875, "logps/rejected": -0.486572265625, "loss": 9.8901, "nll_loss": 0.52978515625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05292510986328125, "rewards/margins": -0.00426483154296875, "rewards/rejected": -0.0486602783203125, "step": 171 }, { "epoch": 0.09285376881031109, "grad_norm": 0.2053658428345874, "learning_rate": 1.962113235061669e-05, "log_odds_chosen": 0.0521240234375, "log_odds_ratio": -0.671142578125, "logits/chosen": 0.15933609008789062, "logits/rejected": 0.3241310119628906, "logps/chosen": -0.5169677734375, "logps/rejected": -0.5185546875, "loss": 9.3862, "nll_loss": 0.5386962890625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05168914794921875, "rewards/margins": 0.00017547607421875, "rewards/rejected": -0.0518646240234375, "step": 172 }, { "epoch": 0.0933936163033943, "grad_norm": 0.20972540279617982, "learning_rate": 1.961647075785013e-05, "log_odds_chosen": -0.0211181640625, "log_odds_ratio": -0.707275390625, "logits/chosen": 0.136444091796875, "logits/rejected": 0.3438072204589844, "logps/chosen": -0.5755615234375, "logps/rejected": -0.550537109375, "loss": 9.7461, "nll_loss": 0.5870361328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.05751800537109375, "rewards/margins": -0.0024261474609375, "rewards/rejected": -0.05509185791015625, "step": 173 }, { "epoch": 0.09393346379647749, "grad_norm": 0.20248933759914342, "learning_rate": 1.9611781222620535e-05, "log_odds_chosen": 0.05596923828125, "log_odds_ratio": -0.6669921875, "logits/chosen": 0.1511530876159668, "logits/rejected": 0.1936492919921875, "logps/chosen": -0.49932861328125, "logps/rejected": -0.5087890625, "loss": 9.27, "nll_loss": 0.5037841796875, "rewards/accuracies": 0.875, "rewards/chosen": -0.0499420166015625, "rewards/margins": 0.00095367431640625, "rewards/rejected": -0.05089569091796875, "step": 174 }, { "epoch": 0.0944733112895607, "grad_norm": 0.20078956628370362, "learning_rate": 1.9607063758554226e-05, "log_odds_chosen": 0.04107666015625, "log_odds_ratio": -0.67431640625, "logits/chosen": 0.13339996337890625, "logits/rejected": 0.19635772705078125, "logps/chosen": -0.50927734375, "logps/rejected": -0.5244140625, "loss": 10.1245, "nll_loss": 0.520263671875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05089569091796875, "rewards/margins": 0.00159454345703125, "rewards/rejected": -0.052490234375, "step": 175 }, { "epoch": 0.09501315878264391, "grad_norm": 0.2056412603429614, "learning_rate": 1.9602318379358687e-05, "log_odds_chosen": 0.033447265625, "log_odds_ratio": -0.67822265625, "logits/chosen": 0.3440207242965698, "logits/rejected": 0.498260498046875, "logps/chosen": -0.5517578125, "logps/rejected": -0.5543212890625, "loss": 9.7739, "nll_loss": 0.55694580078125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05521392822265625, "rewards/margins": 0.0002288818359375, "rewards/rejected": -0.05544281005859375, "step": 176 }, { "epoch": 0.0955530062757271, "grad_norm": 0.19775013408695913, "learning_rate": 1.9597545098822507e-05, "log_odds_chosen": 0.0447998046875, "log_odds_ratio": -0.671875, "logits/chosen": 0.310089111328125, "logits/rejected": 0.3926849365234375, "logps/chosen": -0.494140625, "logps/rejected": -0.5084228515625, "loss": 9.4702, "nll_loss": 0.497314453125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0493927001953125, "rewards/margins": 0.00146484375, "rewards/rejected": -0.0508575439453125, "step": 177 }, { "epoch": 0.09609285376881031, "grad_norm": 0.20473156368210524, "learning_rate": 1.9592743930815345e-05, "log_odds_chosen": 0.00390625, "log_odds_ratio": -0.693359375, "logits/chosen": 0.455291748046875, "logits/rejected": 0.589111328125, "logps/chosen": -0.594970703125, "logps/rejected": -0.587890625, "loss": 10.0229, "nll_loss": 0.60406494140625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.059478759765625, "rewards/margins": -0.0007171630859375, "rewards/rejected": -0.0587615966796875, "step": 178 }, { "epoch": 0.09663270126189351, "grad_norm": 0.21192716386649302, "learning_rate": 1.95879148892879e-05, "log_odds_chosen": -0.0428466796875, "log_odds_ratio": -0.71826171875, "logits/chosen": 0.19705963134765625, "logits/rejected": 0.353424072265625, "logps/chosen": -0.6649169921875, "logps/rejected": -0.6220703125, "loss": 10.25, "nll_loss": 0.66845703125, "rewards/accuracies": 0.5, "rewards/chosen": -0.06642913818359375, "rewards/margins": -0.004241943359375, "rewards/rejected": -0.06218719482421875, "step": 179 }, { "epoch": 0.09717254875497672, "grad_norm": 0.19839514417700263, "learning_rate": 1.9583057988271864e-05, "log_odds_chosen": 0.0738525390625, "log_odds_ratio": -0.65966796875, "logits/chosen": 0.01904296875, "logits/rejected": 0.05583953857421875, "logps/chosen": -0.4371337890625, "logps/rejected": -0.457763671875, "loss": 9.0811, "nll_loss": 0.451904296875, "rewards/accuracies": 0.625, "rewards/chosen": -0.043701171875, "rewards/margins": 0.002044677734375, "rewards/rejected": -0.045745849609375, "step": 180 }, { "epoch": 0.09771239624805993, "grad_norm": 0.21535921293786459, "learning_rate": 1.957817324187987e-05, "log_odds_chosen": 0.079345703125, "log_odds_ratio": -0.65576171875, "logits/chosen": 0.105682373046875, "logits/rejected": 0.21551513671875, "logps/chosen": -0.50927734375, "logps/rejected": -0.528564453125, "loss": 9.2432, "nll_loss": 0.5218505859375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0509033203125, "rewards/margins": 0.00196075439453125, "rewards/rejected": -0.05286407470703125, "step": 181 }, { "epoch": 0.09825224374114312, "grad_norm": 0.17903180762055698, "learning_rate": 1.9573260664305472e-05, "log_odds_chosen": 0.076416015625, "log_odds_ratio": -0.658203125, "logits/chosen": 0.317108154296875, "logits/rejected": 0.384613037109375, "logps/chosen": -0.5047607421875, "logps/rejected": -0.5220947265625, "loss": 8.6655, "nll_loss": 0.52496337890625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0504608154296875, "rewards/margins": 0.001739501953125, "rewards/rejected": -0.0522003173828125, "step": 182 }, { "epoch": 0.09879209123422633, "grad_norm": 0.1929559163340445, "learning_rate": 1.9568320269823082e-05, "log_odds_chosen": 0.0244140625, "log_odds_ratio": -0.68359375, "logits/chosen": 0.30927276611328125, "logits/rejected": 0.3607177734375, "logps/chosen": -0.5040283203125, "logps/rejected": -0.5093994140625, "loss": 9.2402, "nll_loss": 0.50634765625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0503692626953125, "rewards/margins": 0.00052642822265625, "rewards/rejected": -0.05089569091796875, "step": 183 }, { "epoch": 0.09933193872730954, "grad_norm": 0.1729077026588054, "learning_rate": 1.956335207278795e-05, "log_odds_chosen": 0.0894775390625, "log_odds_ratio": -0.650634765625, "logits/chosen": 0.449951171875, "logits/rejected": 0.434906005859375, "logps/chosen": -0.501708984375, "logps/rejected": -0.5303955078125, "loss": 8.9302, "nll_loss": 0.5147705078125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0501708984375, "rewards/margins": 0.0028839111328125, "rewards/rejected": -0.0530548095703125, "step": 184 }, { "epoch": 0.09987178622039274, "grad_norm": 0.21994836111264132, "learning_rate": 1.9558356087636097e-05, "log_odds_chosen": 0.012939453125, "log_odds_ratio": -0.68896484375, "logits/chosen": 0.4226226806640625, "logits/rejected": 0.63580322265625, "logps/chosen": -0.6097412109375, "logps/rejected": -0.609375, "loss": 9.5859, "nll_loss": 0.6192626953125, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0609893798828125, "rewards/margins": 1.52587890625e-05, "rewards/rejected": -0.061004638671875, "step": 185 }, { "epoch": 0.10041163371347595, "grad_norm": 0.20476747683644925, "learning_rate": 1.955333232888431e-05, "log_odds_chosen": 0.1173095703125, "log_odds_ratio": -0.638916015625, "logits/chosen": 0.119537353515625, "logits/rejected": 0.14876556396484375, "logps/chosen": -0.4739990234375, "logps/rejected": -0.521484375, "loss": 8.6411, "nll_loss": 0.4744873046875, "rewards/accuracies": 0.75, "rewards/chosen": -0.0474090576171875, "rewards/margins": 0.004730224609375, "rewards/rejected": -0.0521392822265625, "step": 186 }, { "epoch": 0.10095148120655914, "grad_norm": 0.19758968571483349, "learning_rate": 1.954828081113005e-05, "log_odds_chosen": 0.0235595703125, "log_odds_ratio": -0.68408203125, "logits/chosen": 0.36305999755859375, "logits/rejected": 0.46421051025390625, "logps/chosen": -0.5706787109375, "logps/rejected": -0.58184814453125, "loss": 9.1641, "nll_loss": 0.5782470703125, "rewards/accuracies": 0.5, "rewards/chosen": -0.0570526123046875, "rewards/margins": 0.00115966796875, "rewards/rejected": -0.0582122802734375, "step": 187 }, { "epoch": 0.10149132869964235, "grad_norm": 0.19464584316224054, "learning_rate": 1.9543201549051458e-05, "log_odds_chosen": 0.01025390625, "log_odds_ratio": -0.6953125, "logits/chosen": 0.505096435546875, "logits/rejected": 0.62939453125, "logps/chosen": -0.5382080078125, "logps/rejected": -0.5262451171875, "loss": 9.9702, "nll_loss": 0.57275390625, "rewards/accuracies": 0.3125, "rewards/chosen": -0.053863525390625, "rewards/margins": -0.001251220703125, "rewards/rejected": -0.0526123046875, "step": 188 }, { "epoch": 0.10203117619272556, "grad_norm": 0.21153885680456694, "learning_rate": 1.953809455740729e-05, "log_odds_chosen": 0.0872802734375, "log_odds_ratio": -0.6568603515625, "logits/chosen": 0.124176025390625, "logits/rejected": 0.13006591796875, "logps/chosen": -0.5316162109375, "logps/rejected": -0.55908203125, "loss": 8.9614, "nll_loss": 0.5487060546875, "rewards/accuracies": 0.625, "rewards/chosen": -0.05316162109375, "rewards/margins": 0.00274658203125, "rewards/rejected": -0.055908203125, "step": 189 }, { "epoch": 0.10257102368580875, "grad_norm": 0.1835183386332783, "learning_rate": 1.953295985103686e-05, "log_odds_chosen": 0.020751953125, "log_odds_ratio": -0.6865234375, "logits/chosen": 0.4206390380859375, "logits/rejected": 0.46843719482421875, "logps/chosen": -0.53173828125, "logps/rejected": -0.537841796875, "loss": 9.2549, "nll_loss": 0.542236328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.053192138671875, "rewards/margins": 0.0005950927734375, "rewards/rejected": -0.0537872314453125, "step": 190 }, { "epoch": 0.10311087117889196, "grad_norm": 0.19282917156785645, "learning_rate": 1.952779744486003e-05, "log_odds_chosen": 0.0589599609375, "log_odds_ratio": -0.665283203125, "logits/chosen": 0.05896759033203125, "logits/rejected": 0.21804523468017578, "logps/chosen": -0.4869384765625, "logps/rejected": -0.505615234375, "loss": 8.3242, "nll_loss": 0.497314453125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0486907958984375, "rewards/margins": 0.0019073486328125, "rewards/rejected": -0.05059814453125, "step": 191 }, { "epoch": 0.10365071867197517, "grad_norm": 0.19898932305604908, "learning_rate": 1.952260735387714e-05, "log_odds_chosen": 0.04034423828125, "log_odds_ratio": -0.67822265625, "logits/chosen": 0.15069580078125, "logits/rejected": 0.254180908203125, "logps/chosen": -0.5233154296875, "logps/rejected": -0.53173828125, "loss": 9.3999, "nll_loss": 0.52911376953125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05233001708984375, "rewards/margins": 0.00086212158203125, "rewards/rejected": -0.053192138671875, "step": 192 }, { "epoch": 0.10419056616505837, "grad_norm": 0.20042238864945236, "learning_rate": 1.9517389593168967e-05, "log_odds_chosen": 0.007080078125, "log_odds_ratio": -0.69384765625, "logits/chosen": 0.255035400390625, "logits/rejected": 0.436370849609375, "logps/chosen": -0.6463623046875, "logps/rejected": -0.6312255859375, "loss": 9.6167, "nll_loss": 0.664794921875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.06464385986328125, "rewards/margins": -0.00147247314453125, "rewards/rejected": -0.06317138671875, "step": 193 }, { "epoch": 0.10473041365814158, "grad_norm": 0.19688146143068178, "learning_rate": 1.9512144177896705e-05, "log_odds_chosen": 0.017578125, "log_odds_ratio": -0.6865234375, "logits/chosen": 0.32861328125, "logits/rejected": 0.4210205078125, "logps/chosen": -0.4930419921875, "logps/rejected": -0.4974365234375, "loss": 9.1204, "nll_loss": 0.512939453125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0493011474609375, "rewards/margins": 0.000457763671875, "rewards/rejected": -0.0497589111328125, "step": 194 }, { "epoch": 0.10527026115122477, "grad_norm": 0.201634372269304, "learning_rate": 1.950687112330189e-05, "log_odds_chosen": 0.0589599609375, "log_odds_ratio": -0.668212890625, "logits/chosen": 0.28955078125, "logits/rejected": 0.410247802734375, "logps/chosen": -0.4974365234375, "logps/rejected": -0.509521484375, "loss": 9.2622, "nll_loss": 0.510009765625, "rewards/accuracies": 0.5, "rewards/chosen": -0.04978179931640625, "rewards/margins": 0.00118255615234375, "rewards/rejected": -0.05096435546875, "step": 195 }, { "epoch": 0.10581010864430798, "grad_norm": 0.18089728962101104, "learning_rate": 1.9501570444706376e-05, "log_odds_chosen": 0.080322265625, "log_odds_ratio": -0.656494140625, "logits/chosen": 0.0593109130859375, "logits/rejected": 0.0833892822265625, "logps/chosen": -0.45098876953125, "logps/rejected": -0.48114013671875, "loss": 8.5918, "nll_loss": 0.458740234375, "rewards/accuracies": 0.875, "rewards/chosen": -0.0450897216796875, "rewards/margins": 0.0030364990234375, "rewards/rejected": -0.048126220703125, "step": 196 }, { "epoch": 0.10634995613739119, "grad_norm": 0.20593689486255515, "learning_rate": 1.949624215751227e-05, "log_odds_chosen": 0.1019287109375, "log_odds_ratio": -0.64501953125, "logits/chosen": 0.087371826171875, "logits/rejected": 0.1669158935546875, "logps/chosen": -0.42138671875, "logps/rejected": -0.4530029296875, "loss": 9.3413, "nll_loss": 0.42059326171875, "rewards/accuracies": 0.75, "rewards/chosen": -0.04217529296875, "rewards/margins": 0.00312042236328125, "rewards/rejected": -0.04529571533203125, "step": 197 }, { "epoch": 0.10688980363047439, "grad_norm": 0.222611882999965, "learning_rate": 1.949088627720193e-05, "log_odds_chosen": 0.0185546875, "log_odds_ratio": -0.6875, "logits/chosen": 0.1685791015625, "logits/rejected": 0.28037261962890625, "logps/chosen": -0.51513671875, "logps/rejected": -0.5045166015625, "loss": 9.0293, "nll_loss": 0.5228271484375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05147552490234375, "rewards/margins": -0.00101470947265625, "rewards/rejected": -0.0504608154296875, "step": 198 }, { "epoch": 0.1074296511235576, "grad_norm": 0.2098530257291399, "learning_rate": 1.9485502819337857e-05, "log_odds_chosen": -0.009521484375, "log_odds_ratio": -0.701171875, "logits/chosen": 0.3041839599609375, "logits/rejected": 0.4858741760253906, "logps/chosen": -0.6016845703125, "logps/rejected": -0.5816650390625, "loss": 9.7817, "nll_loss": 0.62353515625, "rewards/accuracies": 0.5, "rewards/chosen": -0.0601654052734375, "rewards/margins": -0.001983642578125, "rewards/rejected": -0.0581817626953125, "step": 199 }, { "epoch": 0.1079694986166408, "grad_norm": 0.1914520728412746, "learning_rate": 1.9480091799562706e-05, "log_odds_chosen": 0.0390625, "log_odds_ratio": -0.6767578125, "logits/chosen": 0.4198760986328125, "logits/rejected": 0.49407958984375, "logps/chosen": -0.505126953125, "logps/rejected": -0.515380859375, "loss": 9.6499, "nll_loss": 0.515625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05055999755859375, "rewards/margins": 0.00095367431640625, "rewards/rejected": -0.051513671875, "step": 200 }, { "epoch": 0.108509346109724, "grad_norm": 0.19265146727662358, "learning_rate": 1.947465323359921e-05, "log_odds_chosen": 0.0457763671875, "log_odds_ratio": -0.673095703125, "logits/chosen": 0.23781585693359375, "logits/rejected": 0.3016948699951172, "logps/chosen": -0.46630859375, "logps/rejected": -0.4810791015625, "loss": 9.4307, "nll_loss": 0.47607421875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04666900634765625, "rewards/margins": 0.00144195556640625, "rewards/rejected": -0.0481109619140625, "step": 201 }, { "epoch": 0.10904919360280721, "grad_norm": 0.2184979120384756, "learning_rate": 1.946918713725015e-05, "log_odds_chosen": 0.00482177734375, "log_odds_ratio": -0.693115234375, "logits/chosen": 0.270111083984375, "logits/rejected": 0.3655853271484375, "logps/chosen": -0.5904541015625, "logps/rejected": -0.58349609375, "loss": 10.001, "nll_loss": 0.6029052734375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0590667724609375, "rewards/margins": -0.00067901611328125, "rewards/rejected": -0.05838775634765625, "step": 202 }, { "epoch": 0.1095890410958904, "grad_norm": 0.2122751071982319, "learning_rate": 1.9463693526398287e-05, "log_odds_chosen": 0.01263427734375, "log_odds_ratio": -0.690185546875, "logits/chosen": 0.261993408203125, "logits/rejected": 0.3926506042480469, "logps/chosen": -0.56207275390625, "logps/rejected": -0.54931640625, "loss": 9.9087, "nll_loss": 0.567138671875, "rewards/accuracies": 0.625, "rewards/chosen": -0.0562591552734375, "rewards/margins": -0.001312255859375, "rewards/rejected": -0.0549468994140625, "step": 203 }, { "epoch": 0.11012888858897361, "grad_norm": 0.21800582540774488, "learning_rate": 1.9458172417006347e-05, "log_odds_chosen": 0.052490234375, "log_odds_ratio": -0.670166015625, "logits/chosen": 0.286376953125, "logits/rejected": 0.384063720703125, "logps/chosen": -0.58056640625, "logps/rejected": -0.588623046875, "loss": 9.6523, "nll_loss": 0.5819091796875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05806732177734375, "rewards/margins": 0.0008087158203125, "rewards/rejected": -0.05887603759765625, "step": 204 }, { "epoch": 0.11066873608205682, "grad_norm": 0.20059649889693643, "learning_rate": 1.945262382511695e-05, "log_odds_chosen": 0.1109619140625, "log_odds_ratio": -0.6436767578125, "logits/chosen": 0.091094970703125, "logits/rejected": 0.2401123046875, "logps/chosen": -0.4324951171875, "logps/rejected": -0.4588623046875, "loss": 9.2952, "nll_loss": 0.44287109375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0432891845703125, "rewards/margins": 0.002593994140625, "rewards/rejected": -0.0458831787109375, "step": 205 }, { "epoch": 0.11120858357514002, "grad_norm": 0.17879795628092518, "learning_rate": 1.944704776685257e-05, "log_odds_chosen": 0.1253662109375, "log_odds_ratio": -0.6353759765625, "logits/chosen": 0.10058975219726562, "logits/rejected": 0.16222381591796875, "logps/chosen": -0.3970947265625, "logps/rejected": -0.435302734375, "loss": 8.6763, "nll_loss": 0.4013671875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.039703369140625, "rewards/margins": 0.00383758544921875, "rewards/rejected": -0.04354095458984375, "step": 206 }, { "epoch": 0.11174843106822323, "grad_norm": 0.20718406993579044, "learning_rate": 1.9441444258415492e-05, "log_odds_chosen": 0.072021484375, "log_odds_ratio": -0.6612548828125, "logits/chosen": 0.36163330078125, "logits/rejected": 0.4254908561706543, "logps/chosen": -0.52813720703125, "logps/rejected": -0.5557861328125, "loss": 9.2024, "nll_loss": 0.54150390625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0528411865234375, "rewards/margins": 0.0027618408203125, "rewards/rejected": -0.05560302734375, "step": 207 }, { "epoch": 0.11228827856130644, "grad_norm": 0.2471521069061002, "learning_rate": 1.9435813316087766e-05, "log_odds_chosen": 0.078857421875, "log_odds_ratio": -0.657470703125, "logits/chosen": 0.264312744140625, "logits/rejected": 0.396820068359375, "logps/chosen": -0.6109619140625, "logps/rejected": -0.6260986328125, "loss": 11.248, "nll_loss": 0.618408203125, "rewards/accuracies": 0.75, "rewards/chosen": -0.06111907958984375, "rewards/margins": 0.001434326171875, "rewards/rejected": -0.06255340576171875, "step": 208 }, { "epoch": 0.11282812605438963, "grad_norm": 0.19385059368463628, "learning_rate": 1.9430154956231147e-05, "log_odds_chosen": 0.1368408203125, "log_odds_ratio": -0.62841796875, "logits/chosen": 0.38662147521972656, "logits/rejected": 0.4166107177734375, "logps/chosen": -0.4373779296875, "logps/rejected": -0.481201171875, "loss": 9.0093, "nll_loss": 0.4420166015625, "rewards/accuracies": 0.875, "rewards/chosen": -0.0437164306640625, "rewards/margins": 0.0044097900390625, "rewards/rejected": -0.048126220703125, "step": 209 }, { "epoch": 0.11336797354747284, "grad_norm": 0.18973932469225885, "learning_rate": 1.9424469195287073e-05, "log_odds_chosen": 0.1614990234375, "log_odds_ratio": -0.62109375, "logits/chosen": 0.18802642822265625, "logits/rejected": 0.280975341796875, "logps/chosen": -0.482177734375, "logps/rejected": -0.5283203125, "loss": 8.8948, "nll_loss": 0.4879150390625, "rewards/accuracies": 0.75, "rewards/chosen": -0.04824066162109375, "rewards/margins": 0.00458526611328125, "rewards/rejected": -0.052825927734375, "step": 210 }, { "epoch": 0.11390782104055604, "grad_norm": 0.20231470732276174, "learning_rate": 1.9418756049776583e-05, "log_odds_chosen": 0.094970703125, "log_odds_ratio": -0.650390625, "logits/chosen": 0.2406005859375, "logits/rejected": 0.309478759765625, "logps/chosen": -0.4332275390625, "logps/rejected": -0.4659423828125, "loss": 9.3057, "nll_loss": 0.44268798828125, "rewards/accuracies": 0.75, "rewards/chosen": -0.04332733154296875, "rewards/margins": 0.00325775146484375, "rewards/rejected": -0.0465850830078125, "step": 211 }, { "epoch": 0.11444766853363925, "grad_norm": 0.21423228240230233, "learning_rate": 1.9413015536300296e-05, "log_odds_chosen": 0.11480712890625, "log_odds_ratio": -0.643310546875, "logits/chosen": 0.2411956787109375, "logits/rejected": 0.3855414390563965, "logps/chosen": -0.6658935546875, "logps/rejected": -0.6878662109375, "loss": 9.7832, "nll_loss": 0.6722412109375, "rewards/accuracies": 0.625, "rewards/chosen": -0.06661224365234375, "rewards/margins": 0.00212860107421875, "rewards/rejected": -0.0687408447265625, "step": 212 }, { "epoch": 0.11498751602672246, "grad_norm": 0.2265848374735817, "learning_rate": 1.9407247671538357e-05, "log_odds_chosen": 0.08935546875, "log_odds_ratio": -0.652099609375, "logits/chosen": 0.3906745910644531, "logits/rejected": 0.48752593994140625, "logps/chosen": -0.5052490234375, "logps/rejected": -0.53515625, "loss": 9.6543, "nll_loss": 0.523681640625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0505218505859375, "rewards/margins": 0.003021240234375, "rewards/rejected": -0.0535430908203125, "step": 213 }, { "epoch": 0.11552736351980565, "grad_norm": 0.19883519337695854, "learning_rate": 1.940145247225037e-05, "log_odds_chosen": 0.2293701171875, "log_odds_ratio": -0.5892333984375, "logits/chosen": 0.1963653564453125, "logits/rejected": 0.21417236328125, "logps/chosen": -0.417236328125, "logps/rejected": -0.4945068359375, "loss": 9.3286, "nll_loss": 0.4183349609375, "rewards/accuracies": 0.9375, "rewards/chosen": -0.041717529296875, "rewards/margins": 0.0077362060546875, "rewards/rejected": -0.0494537353515625, "step": 214 }, { "epoch": 0.11606721101288886, "grad_norm": 0.19434625326149654, "learning_rate": 1.9395629955275393e-05, "log_odds_chosen": 0.029052734375, "log_odds_ratio": -0.68505859375, "logits/chosen": 0.44640350341796875, "logits/rejected": 0.54156494140625, "logps/chosen": -0.5830078125, "logps/rejected": -0.593505859375, "loss": 9.7422, "nll_loss": 0.590576171875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0582733154296875, "rewards/margins": 0.0010528564453125, "rewards/rejected": -0.059326171875, "step": 215 }, { "epoch": 0.11660705850597207, "grad_norm": 0.19472793979463496, "learning_rate": 1.938978013753183e-05, "log_odds_chosen": 0.1917724609375, "log_odds_ratio": -0.6060791015625, "logits/chosen": 0.36004638671875, "logits/rejected": 0.384521484375, "logps/chosen": -0.5111083984375, "logps/rejected": -0.580322265625, "loss": 9.2544, "nll_loss": 0.5169677734375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.051116943359375, "rewards/margins": 0.00689697265625, "rewards/rejected": -0.058013916015625, "step": 216 }, { "epoch": 0.11714690599905526, "grad_norm": 0.22282923573051291, "learning_rate": 1.938390303601743e-05, "log_odds_chosen": 0.1663818359375, "log_odds_ratio": -0.6241455078125, "logits/chosen": 0.2852783203125, "logits/rejected": 0.363433837890625, "logps/chosen": -0.5888671875, "logps/rejected": -0.634765625, "loss": 9.6494, "nll_loss": 0.61181640625, "rewards/accuracies": 0.75, "rewards/chosen": -0.05889892578125, "rewards/margins": 0.004486083984375, "rewards/rejected": -0.063385009765625, "step": 217 }, { "epoch": 0.11768675349213847, "grad_norm": 0.18526487254807897, "learning_rate": 1.9377998667809213e-05, "log_odds_chosen": 0.3486328125, "log_odds_ratio": -0.5423583984375, "logits/chosen": 0.009006500244140625, "logits/rejected": 0.034576416015625, "logps/chosen": -0.40087890625, "logps/rejected": -0.5250244140625, "loss": 9.1025, "nll_loss": 0.40869140625, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04007720947265625, "rewards/margins": 0.01241302490234375, "rewards/rejected": -0.052490234375, "step": 218 }, { "epoch": 0.11822660098522167, "grad_norm": 0.23410882958092294, "learning_rate": 1.937206705006344e-05, "log_odds_chosen": 0.2308349609375, "log_odds_ratio": -0.6007080078125, "logits/chosen": 0.27667236328125, "logits/rejected": 0.40793609619140625, "logps/chosen": -0.68505859375, "logps/rejected": -0.7510986328125, "loss": 9.9966, "nll_loss": 0.68994140625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06856536865234375, "rewards/margins": 0.00653839111328125, "rewards/rejected": -0.075103759765625, "step": 219 }, { "epoch": 0.11876644847830488, "grad_norm": 0.2110171839578039, "learning_rate": 1.9366108200015527e-05, "log_odds_chosen": 0.1805419921875, "log_odds_ratio": -0.61572265625, "logits/chosen": 0.2264404296875, "logits/rejected": 0.34610748291015625, "logps/chosen": -0.481201171875, "logps/rejected": -0.5465087890625, "loss": 9.502, "nll_loss": 0.488525390625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0481414794921875, "rewards/margins": 0.006500244140625, "rewards/rejected": -0.0546417236328125, "step": 220 }, { "epoch": 0.11930629597138809, "grad_norm": 0.19883474113446714, "learning_rate": 1.9360122134980045e-05, "log_odds_chosen": 0.2894287109375, "log_odds_ratio": -0.5654296875, "logits/chosen": 0.13944244384765625, "logits/rejected": 0.13708782196044922, "logps/chosen": -0.43463134765625, "logps/rejected": -0.534423828125, "loss": 8.8545, "nll_loss": 0.4429931640625, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04344940185546875, "rewards/margins": 0.00997161865234375, "rewards/rejected": -0.0534210205078125, "step": 221 }, { "epoch": 0.11984614346447128, "grad_norm": 0.1982500079507664, "learning_rate": 1.935410887235062e-05, "log_odds_chosen": 0.1630859375, "log_odds_ratio": -0.6295166015625, "logits/chosen": 0.24676513671875, "logits/rejected": 0.42066192626953125, "logps/chosen": -0.687744140625, "logps/rejected": -0.7198486328125, "loss": 9.8982, "nll_loss": 0.685302734375, "rewards/accuracies": 0.625, "rewards/chosen": -0.068817138671875, "rewards/margins": 0.003143310546875, "rewards/rejected": -0.07196044921875, "step": 222 }, { "epoch": 0.12038599095755449, "grad_norm": 0.1866254484047601, "learning_rate": 1.934806842959993e-05, "log_odds_chosen": 0.2762451171875, "log_odds_ratio": -0.5738525390625, "logits/chosen": 0.30126953125, "logits/rejected": 0.3137931823730469, "logps/chosen": -0.458740234375, "logps/rejected": -0.559326171875, "loss": 8.7986, "nll_loss": 0.4654541015625, "rewards/accuracies": 0.875, "rewards/chosen": -0.04586029052734375, "rewards/margins": 0.010101318359375, "rewards/rejected": -0.0559539794921875, "step": 223 }, { "epoch": 0.1209258384506377, "grad_norm": 0.21802426255648324, "learning_rate": 1.9342000824279605e-05, "log_odds_chosen": 0.267822265625, "log_odds_ratio": -0.588623046875, "logits/chosen": 0.1971893310546875, "logits/rejected": 0.219451904296875, "logps/chosen": -0.44793701171875, "logps/rejected": -0.544189453125, "loss": 9.166, "nll_loss": 0.46722412109375, "rewards/accuracies": 0.75, "rewards/chosen": -0.04477691650390625, "rewards/margins": 0.00965118408203125, "rewards/rejected": -0.0544281005859375, "step": 224 }, { "epoch": 0.1214656859437209, "grad_norm": 0.22108417566183744, "learning_rate": 1.9335906074020216e-05, "log_odds_chosen": 0.1781005859375, "log_odds_ratio": -0.6224365234375, "logits/chosen": 0.377166748046875, "logits/rejected": 0.39633941650390625, "logps/chosen": -0.4716796875, "logps/rejected": -0.545166015625, "loss": 9.9229, "nll_loss": 0.4754638671875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04718017578125, "rewards/margins": 0.00732421875, "rewards/rejected": -0.05450439453125, "step": 225 }, { "epoch": 0.1220055334368041, "grad_norm": 0.19265052675033395, "learning_rate": 1.9329784196531206e-05, "log_odds_chosen": 0.053466796875, "log_odds_ratio": -0.672607421875, "logits/chosen": 0.40045166015625, "logits/rejected": 0.41949462890625, "logps/chosen": -0.51953125, "logps/rejected": -0.5338134765625, "loss": 9.978, "nll_loss": 0.5238037109375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0519256591796875, "rewards/margins": 0.00146484375, "rewards/rejected": -0.0533905029296875, "step": 226 }, { "epoch": 0.1225453809298873, "grad_norm": 0.1897705827382882, "learning_rate": 1.9323635209600842e-05, "log_odds_chosen": 0.1273193359375, "log_odds_ratio": -0.638427734375, "logits/chosen": 0.4327392578125, "logits/rejected": 0.555267333984375, "logps/chosen": -0.496337890625, "logps/rejected": -0.544189453125, "loss": 8.8435, "nll_loss": 0.5137939453125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04965972900390625, "rewards/margins": 0.00476837158203125, "rewards/rejected": -0.0544281005859375, "step": 227 }, { "epoch": 0.12308522842297051, "grad_norm": 0.19818814453741526, "learning_rate": 1.931745913109616e-05, "log_odds_chosen": 0.1435546875, "log_odds_ratio": -0.6263427734375, "logits/chosen": 0.2536163330078125, "logits/rejected": 0.2846965789794922, "logps/chosen": -0.5028076171875, "logps/rejected": -0.5521240234375, "loss": 9.4009, "nll_loss": 0.509521484375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0502471923828125, "rewards/margins": 0.004974365234375, "rewards/rejected": -0.0552215576171875, "step": 228 }, { "epoch": 0.12362507591605372, "grad_norm": 0.20267689348367035, "learning_rate": 1.9311255978962916e-05, "log_odds_chosen": 0.10009765625, "log_odds_ratio": -0.648193359375, "logits/chosen": 0.3308868408203125, "logits/rejected": 0.3924713134765625, "logps/chosen": -0.48992919921875, "logps/rejected": -0.5186767578125, "loss": 9.2065, "nll_loss": 0.518798828125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04897308349609375, "rewards/margins": 0.00286102294921875, "rewards/rejected": -0.0518341064453125, "step": 229 }, { "epoch": 0.12416492340913692, "grad_norm": 0.1826050527204876, "learning_rate": 1.9305025771225535e-05, "log_odds_chosen": 0.1197509765625, "log_odds_ratio": -0.637451171875, "logits/chosen": 0.4577789306640625, "logits/rejected": 0.5003814697265625, "logps/chosen": -0.4649658203125, "logps/rejected": -0.505126953125, "loss": 8.4897, "nll_loss": 0.47314453125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0465087890625, "rewards/margins": 0.0040130615234375, "rewards/rejected": -0.0505218505859375, "step": 230 }, { "epoch": 0.12470477090222012, "grad_norm": 0.20310414223316278, "learning_rate": 1.929876852598705e-05, "log_odds_chosen": 0.072509765625, "log_odds_ratio": -0.6617431640625, "logits/chosen": 0.3402557373046875, "logits/rejected": 0.44307708740234375, "logps/chosen": -0.6239013671875, "logps/rejected": -0.63525390625, "loss": 9.4014, "nll_loss": 0.62255859375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0623321533203125, "rewards/margins": 0.00115966796875, "rewards/rejected": -0.0634918212890625, "step": 231 }, { "epoch": 0.12524461839530332, "grad_norm": 0.19715655265606707, "learning_rate": 1.9292484261429077e-05, "log_odds_chosen": 0.0770263671875, "log_odds_ratio": -0.6578369140625, "logits/chosen": 0.36788177490234375, "logits/rejected": 0.482513427734375, "logps/chosen": -0.508544921875, "logps/rejected": -0.5301513671875, "loss": 9.5337, "nll_loss": 0.5133056640625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05084228515625, "rewards/margins": 0.0022125244140625, "rewards/rejected": -0.0530548095703125, "step": 232 }, { "epoch": 0.12578446588838654, "grad_norm": 0.19171262021182708, "learning_rate": 1.928617299581172e-05, "log_odds_chosen": 0.03515625, "log_odds_ratio": -0.677490234375, "logits/chosen": 0.4652862548828125, "logits/rejected": 0.5642852783203125, "logps/chosen": -0.4842529296875, "logps/rejected": -0.4937744140625, "loss": 9.6987, "nll_loss": 0.504638671875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04846954345703125, "rewards/margins": 0.00092315673828125, "rewards/rejected": -0.0493927001953125, "step": 233 }, { "epoch": 0.12632431338146974, "grad_norm": 0.1841887194448241, "learning_rate": 1.9279834747473543e-05, "log_odds_chosen": 0.1278076171875, "log_odds_ratio": -0.633544921875, "logits/chosen": 0.44876861572265625, "logits/rejected": 0.5040664672851562, "logps/chosen": -0.45574951171875, "logps/rejected": -0.4981689453125, "loss": 9.2598, "nll_loss": 0.4676513671875, "rewards/accuracies": 0.75, "rewards/chosen": -0.0455780029296875, "rewards/margins": 0.0042724609375, "rewards/rejected": -0.0498504638671875, "step": 234 }, { "epoch": 0.12686416087455293, "grad_norm": 0.22430023250802164, "learning_rate": 1.9273469534831527e-05, "log_odds_chosen": 0.1363525390625, "log_odds_ratio": -0.62890625, "logits/chosen": 0.417724609375, "logits/rejected": 0.569549560546875, "logps/chosen": -0.5107421875, "logps/rejected": -0.5618896484375, "loss": 9.48, "nll_loss": 0.5264892578125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05108642578125, "rewards/margins": 0.005126953125, "rewards/rejected": -0.05621337890625, "step": 235 }, { "epoch": 0.12740400836763613, "grad_norm": 0.18815347826217657, "learning_rate": 1.926707737638098e-05, "log_odds_chosen": 0.0748291015625, "log_odds_ratio": -0.661376953125, "logits/chosen": 0.13098907470703125, "logits/rejected": 0.19672012329101562, "logps/chosen": -0.5321044921875, "logps/rejected": -0.536376953125, "loss": 9.4429, "nll_loss": 0.548095703125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05321502685546875, "rewards/margins": 0.00041961669921875, "rewards/rejected": -0.0536346435546875, "step": 236 }, { "epoch": 0.12794385586071935, "grad_norm": 0.18621533916809715, "learning_rate": 1.9260658290695536e-05, "log_odds_chosen": 0.07958984375, "log_odds_ratio": -0.6580810546875, "logits/chosen": 0.428924560546875, "logits/rejected": 0.480865478515625, "logps/chosen": -0.55615234375, "logps/rejected": -0.5787353515625, "loss": 8.832, "nll_loss": 0.5584716796875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05565643310546875, "rewards/margins": 0.00218963623046875, "rewards/rejected": -0.0578460693359375, "step": 237 }, { "epoch": 0.12848370335380255, "grad_norm": 0.196611568760683, "learning_rate": 1.9254212296427043e-05, "log_odds_chosen": 0.0438232421875, "log_odds_ratio": -0.6748046875, "logits/chosen": 0.220550537109375, "logits/rejected": 0.3625030517578125, "logps/chosen": -0.47222900390625, "logps/rejected": -0.48724365234375, "loss": 8.5474, "nll_loss": 0.4837646484375, "rewards/accuracies": 0.75, "rewards/chosen": -0.047210693359375, "rewards/margins": 0.00151824951171875, "rewards/rejected": -0.04872894287109375, "step": 238 }, { "epoch": 0.12902355084688574, "grad_norm": 0.19290958226738897, "learning_rate": 1.9247739412305554e-05, "log_odds_chosen": 0.0916748046875, "log_odds_ratio": -0.6513671875, "logits/chosen": 0.34796142578125, "logits/rejected": 0.46024322509765625, "logps/chosen": -0.489990234375, "logps/rejected": -0.5169677734375, "loss": 9.5415, "nll_loss": 0.4974365234375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.048980712890625, "rewards/margins": 0.0026702880859375, "rewards/rejected": -0.0516510009765625, "step": 239 }, { "epoch": 0.12956339833996897, "grad_norm": 0.21865093985294634, "learning_rate": 1.9241239657139248e-05, "log_odds_chosen": 0.0614013671875, "log_odds_ratio": -0.6650390625, "logits/chosen": 0.17996859550476074, "logits/rejected": 0.28858184814453125, "logps/chosen": -0.53857421875, "logps/rejected": -0.5596923828125, "loss": 10.3735, "nll_loss": 0.549560546875, "rewards/accuracies": 0.625, "rewards/chosen": -0.05389404296875, "rewards/margins": 0.0020751953125, "rewards/rejected": -0.05596923828125, "step": 240 }, { "epoch": 0.13010324583305216, "grad_norm": 0.19202076395245948, "learning_rate": 1.9234713049814387e-05, "log_odds_chosen": 0.0552978515625, "log_odds_ratio": -0.666748046875, "logits/chosen": 0.506683349609375, "logits/rejected": 0.570953369140625, "logps/chosen": -0.5107421875, "logps/rejected": -0.5272216796875, "loss": 9.835, "nll_loss": 0.5140380859375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0511016845703125, "rewards/margins": 0.0016326904296875, "rewards/rejected": -0.052734375, "step": 241 }, { "epoch": 0.13064309332613536, "grad_norm": 0.21801295757950165, "learning_rate": 1.9228159609295252e-05, "log_odds_chosen": 0.107177734375, "log_odds_ratio": -0.642578125, "logits/chosen": 0.13921165466308594, "logits/rejected": 0.2738189697265625, "logps/chosen": -0.559326171875, "logps/rejected": -0.5850830078125, "loss": 9.3994, "nll_loss": 0.56591796875, "rewards/accuracies": 0.875, "rewards/chosen": -0.05596923828125, "rewards/margins": 0.00251007080078125, "rewards/rejected": -0.05847930908203125, "step": 242 }, { "epoch": 0.13118294081921858, "grad_norm": 0.20940372893678794, "learning_rate": 1.92215793546241e-05, "log_odds_chosen": 0.105224609375, "log_odds_ratio": -0.6435546875, "logits/chosen": 0.5679931640625, "logits/rejected": 0.688995361328125, "logps/chosen": -0.5037841796875, "logps/rejected": -0.5418701171875, "loss": 9.8667, "nll_loss": 0.515625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05037689208984375, "rewards/margins": 0.00379180908203125, "rewards/rejected": -0.054168701171875, "step": 243 }, { "epoch": 0.13172278831230178, "grad_norm": 0.21277540992731991, "learning_rate": 1.92149723049211e-05, "log_odds_chosen": 0.1121826171875, "log_odds_ratio": -0.642578125, "logits/chosen": 0.25202178955078125, "logits/rejected": 0.2824089825153351, "logps/chosen": -0.6290283203125, "logps/rejected": -0.665283203125, "loss": 9.3799, "nll_loss": 0.628173828125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0628509521484375, "rewards/margins": 0.00363922119140625, "rewards/rejected": -0.06649017333984375, "step": 244 }, { "epoch": 0.13226263580538497, "grad_norm": 0.19410072135035342, "learning_rate": 1.9208338479384267e-05, "log_odds_chosen": 0.1085205078125, "log_odds_ratio": -0.64208984375, "logits/chosen": 0.53558349609375, "logits/rejected": 0.6219635009765625, "logps/chosen": -0.521484375, "logps/rejected": -0.5565185546875, "loss": 9.3779, "nll_loss": 0.5325927734375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0521697998046875, "rewards/margins": 0.00347900390625, "rewards/rejected": -0.0556488037109375, "step": 245 }, { "epoch": 0.1328024832984682, "grad_norm": 0.19499824757598289, "learning_rate": 1.9201677897289438e-05, "log_odds_chosen": 0.09869384765625, "log_odds_ratio": -0.6478271484375, "logits/chosen": 0.3364715576171875, "logits/rejected": 0.45020294189453125, "logps/chosen": -0.48065185546875, "logps/rejected": -0.513427734375, "loss": 9.2183, "nll_loss": 0.49603271484375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0480804443359375, "rewards/margins": 0.00327301025390625, "rewards/rejected": -0.05135345458984375, "step": 246 }, { "epoch": 0.1333423307915514, "grad_norm": 0.2011484566622038, "learning_rate": 1.9194990577990178e-05, "log_odds_chosen": 0.072021484375, "log_odds_ratio": -0.6605224609375, "logits/chosen": 0.4024505615234375, "logits/rejected": 0.5169219970703125, "logps/chosen": -0.4871826171875, "logps/rejected": -0.508056640625, "loss": 9.5977, "nll_loss": 0.49591064453125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0487213134765625, "rewards/margins": 0.0020904541015625, "rewards/rejected": -0.050811767578125, "step": 247 }, { "epoch": 0.13388217828463458, "grad_norm": 0.2207165794561571, "learning_rate": 1.9188276540917756e-05, "log_odds_chosen": 0.0909423828125, "log_odds_ratio": -0.6522216796875, "logits/chosen": 0.380279541015625, "logits/rejected": 0.47576904296875, "logps/chosen": -0.5426025390625, "logps/rejected": -0.57470703125, "loss": 9.7529, "nll_loss": 0.5452880859375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05431365966796875, "rewards/margins": 0.0031280517578125, "rewards/rejected": -0.05744171142578125, "step": 248 }, { "epoch": 0.1344220257777178, "grad_norm": 0.2087096603101327, "learning_rate": 1.918153580558106e-05, "log_odds_chosen": 0.113037109375, "log_odds_ratio": -0.640869140625, "logits/chosen": 0.309356689453125, "logits/rejected": 0.39113616943359375, "logps/chosen": -0.4681396484375, "logps/rejected": -0.5064697265625, "loss": 9.248, "nll_loss": 0.48358154296875, "rewards/accuracies": 0.75, "rewards/chosen": -0.0467987060546875, "rewards/margins": 0.00386810302734375, "rewards/rejected": -0.05066680908203125, "step": 249 }, { "epoch": 0.134961873270801, "grad_norm": 0.21006079865392263, "learning_rate": 1.9174768391566574e-05, "log_odds_chosen": 0.0321044921875, "log_odds_ratio": -0.686279296875, "logits/chosen": 0.35722827911376953, "logits/rejected": 0.534942626953125, "logps/chosen": -0.69775390625, "logps/rejected": -0.681884765625, "loss": 10.2983, "nll_loss": 0.6942138671875, "rewards/accuracies": 0.625, "rewards/chosen": -0.0697784423828125, "rewards/margins": -0.0016021728515625, "rewards/rejected": -0.06817626953125, "step": 250 }, { "epoch": 0.1355017207638842, "grad_norm": 0.20542709972816173, "learning_rate": 1.9167974318538286e-05, "log_odds_chosen": -0.0257568359375, "log_odds_ratio": -0.71044921875, "logits/chosen": 0.220123291015625, "logits/rejected": 0.394866943359375, "logps/chosen": -0.5584716796875, "logps/rejected": -0.5482177734375, "loss": 9.2598, "nll_loss": 0.5584716796875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.055877685546875, "rewards/margins": -0.0010528564453125, "rewards/rejected": -0.0548248291015625, "step": 251 }, { "epoch": 0.1360415682569674, "grad_norm": 0.20292549290900072, "learning_rate": 1.9161153606237652e-05, "log_odds_chosen": 0.0418701171875, "log_odds_ratio": -0.674560546875, "logits/chosen": 0.2025146484375, "logits/rejected": 0.3114757537841797, "logps/chosen": -0.514892578125, "logps/rejected": -0.5225830078125, "loss": 8.7014, "nll_loss": 0.5262451171875, "rewards/accuracies": 0.625, "rewards/chosen": -0.0514984130859375, "rewards/margins": 0.00079345703125, "rewards/rejected": -0.0522918701171875, "step": 252 }, { "epoch": 0.13658141575005062, "grad_norm": 0.21958692449370135, "learning_rate": 1.915430627448353e-05, "log_odds_chosen": 0.1439208984375, "log_odds_ratio": -0.6259765625, "logits/chosen": 0.09296178817749023, "logits/rejected": 0.14177703857421875, "logps/chosen": -0.4046630859375, "logps/rejected": -0.448974609375, "loss": 9.5562, "nll_loss": 0.4088134765625, "rewards/accuracies": 0.875, "rewards/chosen": -0.040435791015625, "rewards/margins": 0.00446319580078125, "rewards/rejected": -0.04489898681640625, "step": 253 }, { "epoch": 0.1371212632431338, "grad_norm": 0.1931744130112024, "learning_rate": 1.914743234317214e-05, "log_odds_chosen": 0.0579833984375, "log_odds_ratio": -0.66650390625, "logits/chosen": 0.593994140625, "logits/rejected": 0.7266921997070312, "logps/chosen": -0.5665283203125, "logps/rejected": -0.5911865234375, "loss": 9.0176, "nll_loss": 0.5784912109375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.056640625, "rewards/margins": 0.0024566650390625, "rewards/rejected": -0.0590972900390625, "step": 254 }, { "epoch": 0.137661110736217, "grad_norm": 0.20449670318777624, "learning_rate": 1.9140531832276968e-05, "log_odds_chosen": 0.079833984375, "log_odds_ratio": -0.656494140625, "logits/chosen": 0.2391510009765625, "logits/rejected": 0.2467171549797058, "logps/chosen": -0.4442138671875, "logps/rejected": -0.4674072265625, "loss": 9.0713, "nll_loss": 0.4521484375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0444183349609375, "rewards/margins": 0.00231170654296875, "rewards/rejected": -0.04673004150390625, "step": 255 }, { "epoch": 0.13820095822930023, "grad_norm": 0.2122859096561496, "learning_rate": 1.9133604761848755e-05, "log_odds_chosen": 0.1644287109375, "log_odds_ratio": -0.61767578125, "logits/chosen": 0.20409774780273438, "logits/rejected": 0.2704925537109375, "logps/chosen": -0.42626953125, "logps/rejected": -0.484375, "loss": 8.6616, "nll_loss": 0.42578125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0426177978515625, "rewards/margins": 0.005828857421875, "rewards/rejected": -0.0484466552734375, "step": 256 }, { "epoch": 0.13874080572238343, "grad_norm": 0.21422681927057044, "learning_rate": 1.9126651152015404e-05, "log_odds_chosen": 0.07666015625, "log_odds_ratio": -0.660400390625, "logits/chosen": 0.08684730529785156, "logits/rejected": 0.22406005859375, "logps/chosen": -0.56744384765625, "logps/rejected": -0.5733642578125, "loss": 9.6533, "nll_loss": 0.57171630859375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05672454833984375, "rewards/margins": 0.00066375732421875, "rewards/rejected": -0.0573883056640625, "step": 257 }, { "epoch": 0.13928065321546662, "grad_norm": 0.22983967704162603, "learning_rate": 1.9119671022981928e-05, "log_odds_chosen": 0.098876953125, "log_odds_ratio": -0.64892578125, "logits/chosen": 0.04778289794921875, "logits/rejected": 0.10675048828125, "logps/chosen": -0.42523193359375, "logps/rejected": -0.45654296875, "loss": 10.1528, "nll_loss": 0.4378662109375, "rewards/accuracies": 0.75, "rewards/chosen": -0.04253387451171875, "rewards/margins": 0.00312042236328125, "rewards/rejected": -0.045654296875, "step": 258 }, { "epoch": 0.13982050070854984, "grad_norm": 0.21979248847698585, "learning_rate": 1.9112664395030416e-05, "log_odds_chosen": 0.08306884765625, "log_odds_ratio": -0.654296875, "logits/chosen": 0.16497802734375, "logits/rejected": 0.26751708984375, "logps/chosen": -0.5216064453125, "logps/rejected": -0.5474853515625, "loss": 9.0469, "nll_loss": 0.5274658203125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0521240234375, "rewards/margins": 0.00262451171875, "rewards/rejected": -0.05474853515625, "step": 259 }, { "epoch": 0.14036034820163304, "grad_norm": 0.2062725517048069, "learning_rate": 1.9105631288519934e-05, "log_odds_chosen": 0.076416015625, "log_odds_ratio": -0.65771484375, "logits/chosen": 0.574676513671875, "logits/rejected": 0.7349395751953125, "logps/chosen": -0.553466796875, "logps/rejected": -0.5751953125, "loss": 9.1816, "nll_loss": 0.5648193359375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0553436279296875, "rewards/margins": 0.00213623046875, "rewards/rejected": -0.0574798583984375, "step": 260 }, { "epoch": 0.14090019569471623, "grad_norm": 0.2286565306955373, "learning_rate": 1.9098571723886494e-05, "log_odds_chosen": 0.0078125, "log_odds_ratio": -0.693115234375, "logits/chosen": 0.30707550048828125, "logits/rejected": 0.40041518211364746, "logps/chosen": -0.6199951171875, "logps/rejected": -0.607177734375, "loss": 10.5654, "nll_loss": 0.61676025390625, "rewards/accuracies": 0.5, "rewards/chosen": -0.061981201171875, "rewards/margins": -0.0012359619140625, "rewards/rejected": -0.0607452392578125, "step": 261 }, { "epoch": 0.14144004318779946, "grad_norm": 0.2010287008957491, "learning_rate": 1.909148572164299e-05, "log_odds_chosen": 0.0340576171875, "log_odds_ratio": -0.68310546875, "logits/chosen": 0.4225311279296875, "logits/rejected": 0.5396881103515625, "logps/chosen": -0.5699462890625, "logps/rejected": -0.58160400390625, "loss": 9.2236, "nll_loss": 0.5811767578125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05698394775390625, "rewards/margins": 0.0012054443359375, "rewards/rejected": -0.05818939208984375, "step": 262 }, { "epoch": 0.14197989068088265, "grad_norm": 0.19681897344909982, "learning_rate": 1.9084373302379137e-05, "log_odds_chosen": 0.06536865234375, "log_odds_ratio": -0.6650390625, "logits/chosen": 0.03734588623046875, "logits/rejected": 0.175445556640625, "logps/chosen": -0.63671875, "logps/rejected": -0.64453125, "loss": 9.3594, "nll_loss": 0.6458740234375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.06365966796875, "rewards/margins": 0.00083160400390625, "rewards/rejected": -0.06449127197265625, "step": 263 }, { "epoch": 0.14251973817396585, "grad_norm": 0.20003967603828002, "learning_rate": 1.90772344867614e-05, "log_odds_chosen": -0.01513671875, "log_odds_ratio": -0.7060546875, "logits/chosen": 0.3780517578125, "logits/rejected": 0.4907684326171875, "logps/chosen": -0.62841796875, "logps/rejected": -0.595703125, "loss": 10.1621, "nll_loss": 0.6319580078125, "rewards/accuracies": 0.5, "rewards/chosen": -0.062835693359375, "rewards/margins": -0.00323486328125, "rewards/rejected": -0.059600830078125, "step": 264 }, { "epoch": 0.14305958566704907, "grad_norm": 0.1962521298153105, "learning_rate": 1.907006929553295e-05, "log_odds_chosen": 0.048583984375, "log_odds_ratio": -0.671630859375, "logits/chosen": 0.2872772216796875, "logits/rejected": 0.44939231872558594, "logps/chosen": -0.55523681640625, "logps/rejected": -0.559814453125, "loss": 9.2617, "nll_loss": 0.56878662109375, "rewards/accuracies": 0.5, "rewards/chosen": -0.0555267333984375, "rewards/margins": 0.0004730224609375, "rewards/rejected": -0.055999755859375, "step": 265 }, { "epoch": 0.14359943316013227, "grad_norm": 0.19803311563717899, "learning_rate": 1.90628777495136e-05, "log_odds_chosen": 0.0377197265625, "log_odds_ratio": -0.677490234375, "logits/chosen": 0.3837890625, "logits/rejected": 0.5396728515625, "logps/chosen": -0.596923828125, "logps/rejected": -0.5975341796875, "loss": 10.1499, "nll_loss": 0.6058349609375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0596771240234375, "rewards/margins": 9.1552734375e-05, "rewards/rejected": -0.0597686767578125, "step": 266 }, { "epoch": 0.14413928065321546, "grad_norm": 0.20191020985432653, "learning_rate": 1.9055659869599737e-05, "log_odds_chosen": 0.0455322265625, "log_odds_ratio": -0.6767578125, "logits/chosen": 0.109588623046875, "logits/rejected": 0.2075023651123047, "logps/chosen": -0.5450439453125, "logps/rejected": -0.541748046875, "loss": 9.9727, "nll_loss": 0.5489501953125, "rewards/accuracies": 0.75, "rewards/chosen": -0.05450439453125, "rewards/margins": -0.00029754638671875, "rewards/rejected": -0.05420684814453125, "step": 267 }, { "epoch": 0.14467912814629866, "grad_norm": 0.19235354678537955, "learning_rate": 1.9048415676764268e-05, "log_odds_chosen": 0.02783203125, "log_odds_ratio": -0.6796875, "logits/chosen": 0.608062744140625, "logits/rejected": 0.605682373046875, "logps/chosen": -0.7310791015625, "logps/rejected": -0.7449951171875, "loss": 9.9131, "nll_loss": 0.7421875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.073150634765625, "rewards/margins": 0.0012969970703125, "rewards/rejected": -0.0744476318359375, "step": 268 }, { "epoch": 0.14521897563938188, "grad_norm": 0.20248232464891316, "learning_rate": 1.9041145192056555e-05, "log_odds_chosen": 0.0679931640625, "log_odds_ratio": -0.663330078125, "logits/chosen": 0.4304962158203125, "logits/rejected": 0.5106201171875, "logps/chosen": -0.48492431640625, "logps/rejected": -0.505615234375, "loss": 10.2417, "nll_loss": 0.49566650390625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0484771728515625, "rewards/margins": 0.0020751953125, "rewards/rejected": -0.0505523681640625, "step": 269 }, { "epoch": 0.14575882313246508, "grad_norm": 0.18366431200857913, "learning_rate": 1.903384843660236e-05, "log_odds_chosen": 0.053466796875, "log_odds_ratio": -0.670654296875, "logits/chosen": 0.3508148193359375, "logits/rejected": 0.419921875, "logps/chosen": -0.48907470703125, "logps/rejected": -0.5025634765625, "loss": 9.0747, "nll_loss": 0.49932861328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.04888916015625, "rewards/margins": 0.0013885498046875, "rewards/rejected": -0.0502777099609375, "step": 270 }, { "epoch": 0.14629867062554827, "grad_norm": 0.21541558452989742, "learning_rate": 1.9026525431603775e-05, "log_odds_chosen": 0.0196533203125, "log_odds_ratio": -0.6922607421875, "logits/chosen": 0.2230224609375, "logits/rejected": 0.35631561279296875, "logps/chosen": -0.67657470703125, "logps/rejected": -0.6571044921875, "loss": 10.7739, "nll_loss": 0.6800537109375, "rewards/accuracies": 0.625, "rewards/chosen": -0.06764984130859375, "rewards/margins": -0.00193023681640625, "rewards/rejected": -0.0657196044921875, "step": 271 }, { "epoch": 0.1468385181186315, "grad_norm": 0.18784573475677907, "learning_rate": 1.9019176198339168e-05, "log_odds_chosen": 0.08251953125, "log_odds_ratio": -0.656005859375, "logits/chosen": 0.2913036346435547, "logits/rejected": 0.3303985595703125, "logps/chosen": -0.4364013671875, "logps/rejected": -0.4605712890625, "loss": 8.4102, "nll_loss": 0.4439697265625, "rewards/accuracies": 0.625, "rewards/chosen": -0.0436248779296875, "rewards/margins": 0.002410888671875, "rewards/rejected": -0.0460357666015625, "step": 272 }, { "epoch": 0.1473783656117147, "grad_norm": 0.19656716570208513, "learning_rate": 1.9011800758163118e-05, "log_odds_chosen": 0.14208984375, "log_odds_ratio": -0.6275634765625, "logits/chosen": 0.198089599609375, "logits/rejected": 0.3092613220214844, "logps/chosen": -0.4620361328125, "logps/rejected": -0.5096435546875, "loss": 9.0366, "nll_loss": 0.46875, "rewards/accuracies": 0.75, "rewards/chosen": -0.04619598388671875, "rewards/margins": 0.00475311279296875, "rewards/rejected": -0.0509490966796875, "step": 273 }, { "epoch": 0.14791821310479789, "grad_norm": 0.2039399005077538, "learning_rate": 1.9004399132506352e-05, "log_odds_chosen": 0.0560302734375, "log_odds_ratio": -0.6729736328125, "logits/chosen": 0.206329345703125, "logits/rejected": 0.335418701171875, "logps/chosen": -0.6099853515625, "logps/rejected": -0.608642578125, "loss": 9.5137, "nll_loss": 0.607666015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.06101226806640625, "rewards/margins": -0.00016021728515625, "rewards/rejected": -0.06085205078125, "step": 274 }, { "epoch": 0.1484580605978811, "grad_norm": 0.2085764212488349, "learning_rate": 1.8996971342875686e-05, "log_odds_chosen": 0.12060546875, "log_odds_ratio": -0.6446533203125, "logits/chosen": 0.159576416015625, "logits/rejected": 0.2688751220703125, "logps/chosen": -0.51507568359375, "logps/rejected": -0.539794921875, "loss": 8.7061, "nll_loss": 0.5242919921875, "rewards/accuracies": 0.625, "rewards/chosen": -0.0514984130859375, "rewards/margins": 0.0024871826171875, "rewards/rejected": -0.053985595703125, "step": 275 }, { "epoch": 0.1489979080909643, "grad_norm": 0.20910662873169356, "learning_rate": 1.8989517410853956e-05, "log_odds_chosen": 0.1578369140625, "log_odds_ratio": -0.6207275390625, "logits/chosen": 0.21675872802734375, "logits/rejected": 0.3427734375, "logps/chosen": -0.50128173828125, "logps/rejected": -0.5511474609375, "loss": 9.1401, "nll_loss": 0.51251220703125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.050140380859375, "rewards/margins": 0.00498199462890625, "rewards/rejected": -0.05512237548828125, "step": 276 }, { "epoch": 0.1495377555840475, "grad_norm": 0.21645701989540467, "learning_rate": 1.8982037358099963e-05, "log_odds_chosen": 0.177001953125, "log_odds_ratio": -0.61181640625, "logits/chosen": 0.2740478515625, "logits/rejected": 0.430084228515625, "logps/chosen": -0.48876953125, "logps/rejected": -0.558349609375, "loss": 8.2734, "nll_loss": 0.5064697265625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0488739013671875, "rewards/margins": 0.0069427490234375, "rewards/rejected": -0.055816650390625, "step": 277 }, { "epoch": 0.15007760307713072, "grad_norm": 0.23168311552317675, "learning_rate": 1.8974531206348404e-05, "log_odds_chosen": 0.1226806640625, "log_odds_ratio": -0.6414794921875, "logits/chosen": 0.176483154296875, "logits/rejected": 0.1978759765625, "logps/chosen": -0.48309326171875, "logps/rejected": -0.52294921875, "loss": 9.2622, "nll_loss": 0.4935302734375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.048309326171875, "rewards/margins": 0.0040130615234375, "rewards/rejected": -0.0523223876953125, "step": 278 }, { "epoch": 0.15061745057021392, "grad_norm": 0.2329323528555533, "learning_rate": 1.896699897740982e-05, "log_odds_chosen": 0.15625, "log_odds_ratio": -0.6334228515625, "logits/chosen": 0.22711181640625, "logits/rejected": 0.42242431640625, "logps/chosen": -0.70086669921875, "logps/rejected": -0.7203369140625, "loss": 10.3516, "nll_loss": 0.7005615234375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0700836181640625, "rewards/margins": 0.001953125, "rewards/rejected": -0.0720367431640625, "step": 279 }, { "epoch": 0.1511572980632971, "grad_norm": 0.20355237733667025, "learning_rate": 1.8959440693170513e-05, "log_odds_chosen": 0.197509765625, "log_odds_ratio": -0.611083984375, "logits/chosen": 0.35118722915649414, "logits/rejected": 0.44451141357421875, "logps/chosen": -0.6068115234375, "logps/rejected": -0.6741943359375, "loss": 9.5474, "nll_loss": 0.62255859375, "rewards/accuracies": 0.875, "rewards/chosen": -0.0606536865234375, "rewards/margins": 0.0068359375, "rewards/rejected": -0.0674896240234375, "step": 280 }, { "epoch": 0.15169714555638034, "grad_norm": 0.19746311509293735, "learning_rate": 1.89518563755925e-05, "log_odds_chosen": 0.2928466796875, "log_odds_ratio": -0.57421875, "logits/chosen": 0.06162261962890625, "logits/rejected": 0.06953811645507812, "logps/chosen": -0.4193115234375, "logps/rejected": -0.510986328125, "loss": 9.0132, "nll_loss": 0.42578125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0419464111328125, "rewards/margins": 0.00916290283203125, "rewards/rejected": -0.0511016845703125, "step": 281 }, { "epoch": 0.15223699304946353, "grad_norm": 0.21186093336562734, "learning_rate": 1.894424604671344e-05, "log_odds_chosen": 0.3160400390625, "log_odds_ratio": -0.561767578125, "logits/chosen": 0.28411865234375, "logits/rejected": 0.2676734924316406, "logps/chosen": -0.4249267578125, "logps/rejected": -0.5450439453125, "loss": 9.7314, "nll_loss": 0.4266357421875, "rewards/accuracies": 0.75, "rewards/chosen": -0.04247283935546875, "rewards/margins": 0.0120697021484375, "rewards/rejected": -0.0545501708984375, "step": 282 }, { "epoch": 0.15277684054254673, "grad_norm": 0.220811931317713, "learning_rate": 1.8936609728646576e-05, "log_odds_chosen": 0.09326171875, "log_odds_ratio": -0.6771240234375, "logits/chosen": 0.386749267578125, "logits/rejected": 0.4469757080078125, "logps/chosen": -0.5479736328125, "logps/rejected": -0.584716796875, "loss": 8.7192, "nll_loss": 0.55029296875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05481719970703125, "rewards/margins": 0.00365447998046875, "rewards/rejected": -0.05846405029296875, "step": 283 }, { "epoch": 0.15331668803562992, "grad_norm": 0.19707079381381368, "learning_rate": 1.892894744358066e-05, "log_odds_chosen": 0.33270263671875, "log_odds_ratio": -0.55792236328125, "logits/chosen": 0.15219879150390625, "logits/rejected": 0.22357177734375, "logps/chosen": -0.61053466796875, "logps/rejected": -0.73876953125, "loss": 9.478, "nll_loss": 0.610107421875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0610504150390625, "rewards/margins": 0.0128021240234375, "rewards/rejected": -0.0738525390625, "step": 284 }, { "epoch": 0.15385653552871315, "grad_norm": 0.20834619137266264, "learning_rate": 1.8921259213779907e-05, "log_odds_chosen": 0.2093505859375, "log_odds_ratio": -0.60107421875, "logits/chosen": 0.2002105712890625, "logits/rejected": 0.3265361785888672, "logps/chosen": -0.4932861328125, "logps/rejected": -0.5604248046875, "loss": 9.9858, "nll_loss": 0.496337890625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04935455322265625, "rewards/margins": 0.00672149658203125, "rewards/rejected": -0.0560760498046875, "step": 285 }, { "epoch": 0.15439638302179634, "grad_norm": 0.1910268437524575, "learning_rate": 1.891354506158391e-05, "log_odds_chosen": 0.15771484375, "log_odds_ratio": -0.6201171875, "logits/chosen": 0.35884666442871094, "logits/rejected": 0.43255615234375, "logps/chosen": -0.475830078125, "logps/rejected": -0.530029296875, "loss": 9.3555, "nll_loss": 0.4814453125, "rewards/accuracies": 0.75, "rewards/chosen": -0.047576904296875, "rewards/margins": 0.0054168701171875, "rewards/rejected": -0.0529937744140625, "step": 286 }, { "epoch": 0.15493623051487954, "grad_norm": 0.2680227671163248, "learning_rate": 1.8905805009407586e-05, "log_odds_chosen": 0.28271484375, "log_odds_ratio": -0.5784912109375, "logits/chosen": 0.071929931640625, "logits/rejected": 0.0919342041015625, "logps/chosen": -0.47137451171875, "logps/rejected": -0.572265625, "loss": 9.9663, "nll_loss": 0.4879150390625, "rewards/accuracies": 0.75, "rewards/chosen": -0.047119140625, "rewards/margins": 0.01009368896484375, "rewards/rejected": -0.0572052001953125, "step": 287 }, { "epoch": 0.15547607800796276, "grad_norm": 0.1983199026965137, "learning_rate": 1.889803907974112e-05, "log_odds_chosen": 0.27215576171875, "log_odds_ratio": -0.5711669921875, "logits/chosen": 0.03117656707763672, "logits/rejected": 0.0110626220703125, "logps/chosen": -0.4267578125, "logps/rejected": -0.52447509765625, "loss": 9.0911, "nll_loss": 0.44427490234375, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04270172119140625, "rewards/margins": 0.00972747802734375, "rewards/rejected": -0.05242919921875, "step": 288 }, { "epoch": 0.15601592550104595, "grad_norm": 0.21186161005484117, "learning_rate": 1.8890247295149873e-05, "log_odds_chosen": 0.1968994140625, "log_odds_ratio": -0.6170654296875, "logits/chosen": 0.08782958984375, "logits/rejected": 0.184539794921875, "logps/chosen": -0.55450439453125, "logps/rejected": -0.606689453125, "loss": 9.2539, "nll_loss": 0.563720703125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.055419921875, "rewards/margins": 0.0052490234375, "rewards/rejected": -0.0606689453125, "step": 289 }, { "epoch": 0.15655577299412915, "grad_norm": 0.18556098580748087, "learning_rate": 1.8882429678274338e-05, "log_odds_chosen": 0.1346435546875, "log_odds_ratio": -0.6396484375, "logits/chosen": 0.520111083984375, "logits/rejected": 0.599945068359375, "logps/chosen": -0.5538330078125, "logps/rejected": -0.6082763671875, "loss": 9.2939, "nll_loss": 0.5614013671875, "rewards/accuracies": 0.75, "rewards/chosen": -0.05538177490234375, "rewards/margins": 0.00550079345703125, "rewards/rejected": -0.060882568359375, "step": 290 }, { "epoch": 0.15709562048721237, "grad_norm": 0.24915755847925494, "learning_rate": 1.8874586251830074e-05, "log_odds_chosen": 0.09307861328125, "log_odds_ratio": -0.6517333984375, "logits/chosen": 0.20166015625, "logits/rejected": 0.28350830078125, "logps/chosen": -0.5511474609375, "logps/rejected": -0.5869140625, "loss": 10.7739, "nll_loss": 0.5517578125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0550689697265625, "rewards/margins": 0.00360107421875, "rewards/rejected": -0.0586700439453125, "step": 291 }, { "epoch": 0.15763546798029557, "grad_norm": 0.20337011083726064, "learning_rate": 1.886671703860763e-05, "log_odds_chosen": 0.170654296875, "log_odds_ratio": -0.61767578125, "logits/chosen": 0.589508056640625, "logits/rejected": 0.59320068359375, "logps/chosen": -0.51025390625, "logps/rejected": -0.5716552734375, "loss": 9.9307, "nll_loss": 0.512451171875, "rewards/accuracies": 0.625, "rewards/chosen": -0.051025390625, "rewards/margins": 0.006134033203125, "rewards/rejected": -0.057159423828125, "step": 292 }, { "epoch": 0.15817531547337876, "grad_norm": 0.27356861140753586, "learning_rate": 1.8858822061472484e-05, "log_odds_chosen": 0.061767578125, "log_odds_ratio": -0.66845703125, "logits/chosen": 0.5326995849609375, "logits/rejected": 0.638092041015625, "logps/chosen": -0.54296875, "logps/rejected": -0.562744140625, "loss": 9.7544, "nll_loss": 0.545654296875, "rewards/accuracies": 0.5, "rewards/chosen": -0.05434417724609375, "rewards/margins": 0.00193023681640625, "rewards/rejected": -0.0562744140625, "step": 293 }, { "epoch": 0.158715162966462, "grad_norm": 0.20288470584512552, "learning_rate": 1.8850901343364977e-05, "log_odds_chosen": 0.0743408203125, "log_odds_ratio": -0.6607666015625, "logits/chosen": 0.2889862060546875, "logits/rejected": 0.4249916076660156, "logps/chosen": -0.4971923828125, "logps/rejected": -0.51708984375, "loss": 9.104, "nll_loss": 0.4993896484375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04967498779296875, "rewards/margins": 0.00202178955078125, "rewards/rejected": -0.05169677734375, "step": 294 }, { "epoch": 0.15925501045954518, "grad_norm": 0.18822047955853966, "learning_rate": 1.8842954907300236e-05, "log_odds_chosen": 0.0072021484375, "log_odds_ratio": -0.693115234375, "logits/chosen": 0.16986083984375, "logits/rejected": 0.32159423828125, "logps/chosen": -0.5611572265625, "logps/rejected": -0.5447998046875, "loss": 9.2349, "nll_loss": 0.565673828125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05612945556640625, "rewards/margins": -0.0016326904296875, "rewards/rejected": -0.05449676513671875, "step": 295 }, { "epoch": 0.15979485795262838, "grad_norm": 0.20820263869005795, "learning_rate": 1.8834982776368135e-05, "log_odds_chosen": 0.010498046875, "log_odds_ratio": -0.69091796875, "logits/chosen": 0.56463623046875, "logits/rejected": 0.70867919921875, "logps/chosen": -0.5517578125, "logps/rejected": -0.54638671875, "loss": 10.1257, "nll_loss": 0.5711669921875, "rewards/accuracies": 0.5, "rewards/chosen": -0.05515289306640625, "rewards/margins": -0.00052642822265625, "rewards/rejected": -0.05462646484375, "step": 296 }, { "epoch": 0.16033470544571157, "grad_norm": 0.2043498511873113, "learning_rate": 1.8826984973733197e-05, "log_odds_chosen": 0.00982666015625, "log_odds_ratio": -0.689697265625, "logits/chosen": 0.217987060546875, "logits/rejected": 0.38916015625, "logps/chosen": -0.4844970703125, "logps/rejected": -0.48016357421875, "loss": 9.9556, "nll_loss": 0.49749755859375, "rewards/accuracies": 0.5, "rewards/chosen": -0.0484466552734375, "rewards/margins": -0.00045013427734375, "rewards/rejected": -0.04799652099609375, "step": 297 }, { "epoch": 0.1608745529387948, "grad_norm": 0.20079118912657276, "learning_rate": 1.881896152263453e-05, "log_odds_chosen": 0.01953125, "log_odds_ratio": -0.686279296875, "logits/chosen": 0.417388916015625, "logits/rejected": 0.509735107421875, "logps/chosen": -0.45361328125, "logps/rejected": -0.452880859375, "loss": 10.5801, "nll_loss": 0.4698486328125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04534912109375, "rewards/margins": -3.814697265625e-05, "rewards/rejected": -0.04531097412109375, "step": 298 }, { "epoch": 0.161414400431878, "grad_norm": 0.19093320927078938, "learning_rate": 1.8810912446385793e-05, "log_odds_chosen": -0.02093505859375, "log_odds_ratio": -0.705322265625, "logits/chosen": -0.0682220458984375, "logits/rejected": 0.06914520263671875, "logps/chosen": -0.4705810546875, "logps/rejected": -0.45562744140625, "loss": 8.8896, "nll_loss": 0.4742431640625, "rewards/accuracies": 0.5, "rewards/chosen": -0.0470428466796875, "rewards/margins": -0.00147247314453125, "rewards/rejected": -0.04557037353515625, "step": 299 }, { "epoch": 0.1619542479249612, "grad_norm": 0.22316383625794725, "learning_rate": 1.8802837768375083e-05, "log_odds_chosen": -0.02978515625, "log_odds_ratio": -0.712646484375, "logits/chosen": 0.21770572662353516, "logits/rejected": 0.47258949279785156, "logps/chosen": -0.518798828125, "logps/rejected": -0.495361328125, "loss": 9.647, "nll_loss": 0.5284423828125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.051910400390625, "rewards/margins": -0.002410888671875, "rewards/rejected": -0.04949951171875, "step": 300 }, { "epoch": 0.1624940954180444, "grad_norm": 0.19448211500590193, "learning_rate": 1.879473751206489e-05, "log_odds_chosen": -0.0157470703125, "log_odds_ratio": -0.701171875, "logits/chosen": 0.2978515625, "logits/rejected": 0.4540863037109375, "logps/chosen": -0.4766845703125, "logps/rejected": -0.4671630859375, "loss": 9.1719, "nll_loss": 0.482421875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.04766845703125, "rewards/margins": -0.00096893310546875, "rewards/rejected": -0.04669952392578125, "step": 301 }, { "epoch": 0.1630339429111276, "grad_norm": 0.1951461878881943, "learning_rate": 1.8786611700992044e-05, "log_odds_chosen": -0.0106201171875, "log_odds_ratio": -0.69873046875, "logits/chosen": 0.3347930908203125, "logits/rejected": 0.40362548828125, "logps/chosen": -0.445556640625, "logps/rejected": -0.4405517578125, "loss": 9.3394, "nll_loss": 0.45849609375, "rewards/accuracies": 0.375, "rewards/chosen": -0.04457855224609375, "rewards/margins": -0.00052642822265625, "rewards/rejected": -0.0440521240234375, "step": 302 }, { "epoch": 0.1635737904042108, "grad_norm": 0.20398745418877434, "learning_rate": 1.8778460358767602e-05, "log_odds_chosen": 0.02783203125, "log_odds_ratio": -0.68115234375, "logits/chosen": 0.43963623046875, "logits/rejected": 0.535919189453125, "logps/chosen": -0.501220703125, "logps/rejected": -0.511962890625, "loss": 9.8062, "nll_loss": 0.5152587890625, "rewards/accuracies": 0.5, "rewards/chosen": -0.05016326904296875, "rewards/margins": 0.00101470947265625, "rewards/rejected": -0.051177978515625, "step": 303 }, { "epoch": 0.16411363789729402, "grad_norm": 0.2083612564171547, "learning_rate": 1.877028350907683e-05, "log_odds_chosen": -0.0811767578125, "log_odds_ratio": -0.737060546875, "logits/chosen": 0.2366790771484375, "logits/rejected": 0.4426116943359375, "logps/chosen": -0.6417236328125, "logps/rejected": -0.583251953125, "loss": 9.8032, "nll_loss": 0.634521484375, "rewards/accuracies": 0.1875, "rewards/chosen": -0.0641632080078125, "rewards/margins": -0.00585174560546875, "rewards/rejected": -0.05831146240234375, "step": 304 }, { "epoch": 0.16465348539037722, "grad_norm": 0.2068375976855463, "learning_rate": 1.8762081175679098e-05, "log_odds_chosen": -0.002197265625, "log_odds_ratio": -0.6943359375, "logits/chosen": 0.19228363037109375, "logits/rejected": 0.2615318298339844, "logps/chosen": -0.444580078125, "logps/rejected": -0.44140625, "loss": 10.0059, "nll_loss": 0.4603271484375, "rewards/accuracies": 0.4375, "rewards/chosen": -0.04443359375, "rewards/margins": -0.0002899169921875, "rewards/rejected": -0.0441436767578125, "step": 305 }, { "epoch": 0.16519333288346041, "grad_norm": 0.18924657894695862, "learning_rate": 1.875385338240783e-05, "log_odds_chosen": -0.0057373046875, "log_odds_ratio": -0.69970703125, "logits/chosen": 0.266815185546875, "logits/rejected": 0.35942554473876953, "logps/chosen": -0.5560302734375, "logps/rejected": -0.53564453125, "loss": 9.1797, "nll_loss": 0.57080078125, "rewards/accuracies": 0.5, "rewards/chosen": -0.05562591552734375, "rewards/margins": -0.002044677734375, "rewards/rejected": -0.05358123779296875, "step": 306 }, { "epoch": 0.16573318037654364, "grad_norm": 0.21752260937114196, "learning_rate": 1.8745600153170417e-05, "log_odds_chosen": -0.00946044921875, "log_odds_ratio": -0.698486328125, "logits/chosen": 0.4881439208984375, "logits/rejected": 0.6822357177734375, "logps/chosen": -0.53680419921875, "logps/rejected": -0.52978515625, "loss": 10.5723, "nll_loss": 0.5574951171875, "rewards/accuracies": 0.375, "rewards/chosen": -0.0536346435546875, "rewards/margins": -0.00067901611328125, "rewards/rejected": -0.05295562744140625, "step": 307 }, { "epoch": 0.16627302786962683, "grad_norm": 0.19942774639468164, "learning_rate": 1.8737321511948175e-05, "log_odds_chosen": -0.00152587890625, "log_odds_ratio": -0.695556640625, "logits/chosen": 0.3368854522705078, "logits/rejected": 0.484588623046875, "logps/chosen": -0.49005126953125, "logps/rejected": -0.48681640625, "loss": 9.4204, "nll_loss": 0.5101318359375, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0490264892578125, "rewards/margins": -0.00034332275390625, "rewards/rejected": -0.04868316650390625, "step": 308 }, { "epoch": 0.16681287536271003, "grad_norm": 0.2028020059463369, "learning_rate": 1.8729017482796254e-05, "log_odds_chosen": 0.00018310546875, "log_odds_ratio": -0.69384765625, "logits/chosen": -0.16436767578125, "logits/rejected": 0.0766754150390625, "logps/chosen": -0.4066162109375, "logps/rejected": -0.40582275390625, "loss": 9.1626, "nll_loss": 0.4102783203125, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0406494140625, "rewards/margins": -7.62939453125e-05, "rewards/rejected": -0.0405731201171875, "step": 309 }, { "epoch": 0.16735272285579325, "grad_norm": 0.22090978061846053, "learning_rate": 1.8720688089843567e-05, "log_odds_chosen": -0.032958984375, "log_odds_ratio": -0.71337890625, "logits/chosen": 0.05124664306640625, "logits/rejected": 0.29038238525390625, "logps/chosen": -0.568359375, "logps/rejected": -0.53564453125, "loss": 10.3608, "nll_loss": 0.5714111328125, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05681610107421875, "rewards/margins": -0.00322723388671875, "rewards/rejected": -0.0535888671875, "step": 310 }, { "epoch": 0.16789257034887645, "grad_norm": 0.1888809685413811, "learning_rate": 1.871233335729273e-05, "log_odds_chosen": 0.0157470703125, "log_odds_ratio": -0.68603515625, "logits/chosen": 0.3563270568847656, "logits/rejected": 0.464447021484375, "logps/chosen": -0.4703369140625, "logps/rejected": -0.47314453125, "loss": 9.4478, "nll_loss": 0.4827880859375, "rewards/accuracies": 0.5, "rewards/chosen": -0.04703521728515625, "rewards/margins": 0.0002899169921875, "rewards/rejected": -0.04732513427734375, "step": 311 }, { "epoch": 0.16843241784195964, "grad_norm": 0.20556830656522496, "learning_rate": 1.8703953309419994e-05, "log_odds_chosen": 0.02227783203125, "log_odds_ratio": -0.682861328125, "logits/chosen": 0.26055908203125, "logits/rejected": 0.37519073486328125, "logps/chosen": -0.49212646484375, "logps/rejected": -0.49462890625, "loss": 10.7471, "nll_loss": 0.500732421875, "rewards/accuracies": 0.5, "rewards/chosen": -0.04921722412109375, "rewards/margins": 0.000244140625, "rewards/rejected": -0.04946136474609375, "step": 312 }, { "epoch": 0.16897226533504284, "grad_norm": 0.2006033168126315, "learning_rate": 1.8695547970575155e-05, "log_odds_chosen": -0.0177001953125, "log_odds_ratio": -0.704345703125, "logits/chosen": 0.186431884765625, "logits/rejected": 0.39068603515625, "logps/chosen": -0.541748046875, "logps/rejected": -0.5150146484375, "loss": 10.3242, "nll_loss": 0.55706787109375, "rewards/accuracies": 0.5, "rewards/chosen": -0.0541839599609375, "rewards/margins": -0.002655029296875, "rewards/rejected": -0.0515289306640625, "step": 313 }, { "epoch": 0.16951211282812606, "grad_norm": 0.1916708113559277, "learning_rate": 1.8687117365181514e-05, "log_odds_chosen": -0.003173828125, "log_odds_ratio": -0.695556640625, "logits/chosen": 0.20752716064453125, "logits/rejected": 0.309906005859375, "logps/chosen": -0.450927734375, "logps/rejected": -0.448486328125, "loss": 9.1909, "nll_loss": 0.458251953125, "rewards/accuracies": 0.375, "rewards/chosen": -0.04511260986328125, "rewards/margins": -0.0002899169921875, "rewards/rejected": -0.04482269287109375, "step": 314 }, { "epoch": 0.17005196032120926, "grad_norm": 0.20463556067935496, "learning_rate": 1.8678661517735768e-05, "log_odds_chosen": 0.01513671875, "log_odds_ratio": -0.68701171875, "logits/chosen": 0.4519500732421875, "logits/rejected": 0.6526260375976562, "logps/chosen": -0.5526123046875, "logps/rejected": -0.5509033203125, "loss": 10.1396, "nll_loss": 0.5611572265625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0552520751953125, "rewards/margins": -0.0001678466796875, "rewards/rejected": -0.055084228515625, "step": 315 }, { "epoch": 0.17059180781429245, "grad_norm": 0.18430621025953253, "learning_rate": 1.8670180452807982e-05, "log_odds_chosen": 0.0115966796875, "log_odds_ratio": -0.689453125, "logits/chosen": 0.5448760986328125, "logits/rejected": 0.63653564453125, "logps/chosen": -0.510986328125, "logps/rejected": -0.5108642578125, "loss": 9.002, "nll_loss": 0.5216064453125, "rewards/accuracies": 0.375, "rewards/chosen": -0.05108642578125, "rewards/margins": 0.0, "rewards/rejected": -0.05108642578125, "step": 316 }, { "epoch": 0.17113165530737567, "grad_norm": 0.19922362385208037, "learning_rate": 1.8661674195041476e-05, "log_odds_chosen": -0.01202392578125, "log_odds_ratio": -0.701416015625, "logits/chosen": 0.30545806884765625, "logits/rejected": 0.4622650146484375, "logps/chosen": -0.51214599609375, "logps/rejected": -0.506591796875, "loss": 8.9111, "nll_loss": 0.5185546875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05123138427734375, "rewards/margins": -0.0005645751953125, "rewards/rejected": -0.05066680908203125, "step": 317 }, { "epoch": 0.17167150280045887, "grad_norm": 0.21001307443898595, "learning_rate": 1.8653142769152785e-05, "log_odds_chosen": -0.01202392578125, "log_odds_ratio": -0.701416015625, "logits/chosen": 0.368255615234375, "logits/rejected": 0.5109939575195312, "logps/chosen": -0.5648193359375, "logps/rejected": -0.54473876953125, "loss": 10.605, "nll_loss": 0.574951171875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05644989013671875, "rewards/margins": -0.00199127197265625, "rewards/rejected": -0.0544586181640625, "step": 318 }, { "epoch": 0.17221135029354206, "grad_norm": 0.19619917480233479, "learning_rate": 1.864458619993157e-05, "log_odds_chosen": 0.085205078125, "log_odds_ratio": -0.6541748046875, "logits/chosen": 0.3424355983734131, "logits/rejected": 0.45700836181640625, "logps/chosen": -0.4627685546875, "logps/rejected": -0.4888916015625, "loss": 9.2573, "nll_loss": 0.4732666015625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0462646484375, "rewards/margins": 0.00262451171875, "rewards/rejected": -0.04888916015625, "step": 319 }, { "epoch": 0.1727511977866253, "grad_norm": 0.18129790989533176, "learning_rate": 1.8636004512240546e-05, "log_odds_chosen": 0.01873779296875, "log_odds_ratio": -0.68603515625, "logits/chosen": 0.19197463989257812, "logits/rejected": 0.41344451904296875, "logps/chosen": -0.58544921875, "logps/rejected": -0.5802001953125, "loss": 9.251, "nll_loss": 0.6075439453125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.058502197265625, "rewards/margins": -0.0004730224609375, "rewards/rejected": -0.0580291748046875, "step": 320 }, { "epoch": 0.17329104527970848, "grad_norm": 0.203912702060569, "learning_rate": 1.8627397731015426e-05, "log_odds_chosen": 0.068359375, "log_odds_ratio": -0.662841796875, "logits/chosen": 0.27752685546875, "logits/rejected": 0.41939544677734375, "logps/chosen": -0.531982421875, "logps/rejected": -0.5601806640625, "loss": 10.2251, "nll_loss": 0.539306640625, "rewards/accuracies": 0.5, "rewards/chosen": -0.0531768798828125, "rewards/margins": 0.0028228759765625, "rewards/rejected": -0.055999755859375, "step": 321 }, { "epoch": 0.17383089277279168, "grad_norm": 0.19868842317678748, "learning_rate": 1.8618765881264828e-05, "log_odds_chosen": 0.114013671875, "log_odds_ratio": -0.6436767578125, "logits/chosen": 0.22381591796875, "logits/rejected": 0.3878631591796875, "logps/chosen": -0.56463623046875, "logps/rejected": -0.5966796875, "loss": 9.187, "nll_loss": 0.56536865234375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05648040771484375, "rewards/margins": 0.00319671630859375, "rewards/rejected": -0.0596771240234375, "step": 322 }, { "epoch": 0.1743707402658749, "grad_norm": 0.21291910870128816, "learning_rate": 1.8610108988070213e-05, "log_odds_chosen": 0.08306884765625, "log_odds_ratio": -0.655517578125, "logits/chosen": 0.100494384765625, "logits/rejected": 0.22417831420898438, "logps/chosen": -0.46826171875, "logps/rejected": -0.48785400390625, "loss": 9.5225, "nll_loss": 0.478271484375, "rewards/accuracies": 0.75, "rewards/chosen": -0.04686737060546875, "rewards/margins": 0.00193023681640625, "rewards/rejected": -0.048797607421875, "step": 323 }, { "epoch": 0.1749105877589581, "grad_norm": 0.21700816859912708, "learning_rate": 1.860142707658581e-05, "log_odds_chosen": 0.019287109375, "log_odds_ratio": -0.6912841796875, "logits/chosen": 0.365020751953125, "logits/rejected": 0.448577880859375, "logps/chosen": -0.5330810546875, "logps/rejected": -0.533935546875, "loss": 10.0264, "nll_loss": 0.552490234375, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05330657958984375, "rewards/margins": 0.00012969970703125, "rewards/rejected": -0.053436279296875, "step": 324 }, { "epoch": 0.1754504352520413, "grad_norm": 0.20040084002686154, "learning_rate": 1.8592720172038545e-05, "log_odds_chosen": 0.12176513671875, "log_odds_ratio": -0.63720703125, "logits/chosen": 0.7154541015625, "logits/rejected": 0.762542724609375, "logps/chosen": -0.53851318359375, "logps/rejected": -0.5841064453125, "loss": 9.3779, "nll_loss": 0.55126953125, "rewards/accuracies": 0.75, "rewards/chosen": -0.053863525390625, "rewards/margins": 0.00455474853515625, "rewards/rejected": -0.05841827392578125, "step": 325 }, { "epoch": 0.17599028274512452, "grad_norm": 0.22290833216951847, "learning_rate": 1.8583988299727966e-05, "log_odds_chosen": -0.022216796875, "log_odds_ratio": -0.71435546875, "logits/chosen": 0.075225830078125, "logits/rejected": 0.12316131591796875, "logps/chosen": -0.4561767578125, "logps/rejected": -0.4591064453125, "loss": 9.4082, "nll_loss": 0.465576171875, "rewards/accuracies": 0.625, "rewards/chosen": -0.045623779296875, "rewards/margins": 0.0002593994140625, "rewards/rejected": -0.0458831787109375, "step": 326 }, { "epoch": 0.1765301302382077, "grad_norm": 0.2038841014441323, "learning_rate": 1.8575231485026173e-05, "log_odds_chosen": 0.0953369140625, "log_odds_ratio": -0.65234375, "logits/chosen": -0.0264892578125, "logits/rejected": 0.04913330078125, "logps/chosen": -0.4063720703125, "logps/rejected": -0.437255859375, "loss": 9.3062, "nll_loss": 0.4119873046875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0406341552734375, "rewards/margins": 0.0030975341796875, "rewards/rejected": -0.043731689453125, "step": 327 }, { "epoch": 0.1770699777312909, "grad_norm": 0.20278418994117567, "learning_rate": 1.856644975337773e-05, "log_odds_chosen": 0.080078125, "log_odds_ratio": -0.658203125, "logits/chosen": 0.415069580078125, "logits/rejected": 0.58795166015625, "logps/chosen": -0.588623046875, "logps/rejected": -0.60595703125, "loss": 9.585, "nll_loss": 0.589111328125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0589141845703125, "rewards/margins": 0.001678466796875, "rewards/rejected": -0.0605926513671875, "step": 328 }, { "epoch": 0.1776098252243741, "grad_norm": 0.19608015642908969, "learning_rate": 1.8557643130299615e-05, "log_odds_chosen": 0.1107177734375, "log_odds_ratio": -0.6448974609375, "logits/chosen": 0.23028564453125, "logits/rejected": 0.36358642578125, "logps/chosen": -0.5615234375, "logps/rejected": -0.6014404296875, "loss": 9.2153, "nll_loss": 0.5677490234375, "rewards/accuracies": 0.75, "rewards/chosen": -0.05617523193359375, "rewards/margins": 0.00403594970703125, "rewards/rejected": -0.060211181640625, "step": 329 }, { "epoch": 0.17814967271745732, "grad_norm": 0.20709283066917689, "learning_rate": 1.8548811641381118e-05, "log_odds_chosen": 0.1478271484375, "log_odds_ratio": -0.6268310546875, "logits/chosen": 0.3760223388671875, "logits/rejected": 0.3802337646484375, "logps/chosen": -0.4912109375, "logps/rejected": -0.5438232421875, "loss": 9.7651, "nll_loss": 0.505126953125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04915618896484375, "rewards/margins": 0.00516510009765625, "rewards/rejected": -0.0543212890625, "step": 330 }, { "epoch": 0.17868952021054052, "grad_norm": 0.21951085667098036, "learning_rate": 1.8539955312283798e-05, "log_odds_chosen": 0.1151123046875, "log_odds_ratio": -0.6416015625, "logits/chosen": 0.31488800048828125, "logits/rejected": 0.35634422302246094, "logps/chosen": -0.439453125, "logps/rejected": -0.4749755859375, "loss": 9.0027, "nll_loss": 0.44256591796875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04396820068359375, "rewards/margins": 0.00348663330078125, "rewards/rejected": -0.047454833984375, "step": 331 }, { "epoch": 0.17922936770362372, "grad_norm": 0.19239678314325567, "learning_rate": 1.853107416874138e-05, "log_odds_chosen": 0.04248046875, "log_odds_ratio": -0.67431640625, "logits/chosen": 0.270294189453125, "logits/rejected": 0.36177825927734375, "logps/chosen": -0.4708251953125, "logps/rejected": -0.4842529296875, "loss": 8.5708, "nll_loss": 0.4755859375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0471038818359375, "rewards/margins": 0.0012969970703125, "rewards/rejected": -0.04840087890625, "step": 332 }, { "epoch": 0.17976921519670694, "grad_norm": 0.18391923555160178, "learning_rate": 1.8522168236559693e-05, "log_odds_chosen": 0.1397705078125, "log_odds_ratio": -0.62744140625, "logits/chosen": 0.39788818359375, "logits/rejected": 0.48554420471191406, "logps/chosen": -0.4464111328125, "logps/rejected": -0.49169921875, "loss": 8.8672, "nll_loss": 0.4564208984375, "rewards/accuracies": 0.75, "rewards/chosen": -0.04463958740234375, "rewards/margins": 0.00453948974609375, "rewards/rejected": -0.0491790771484375, "step": 333 }, { "epoch": 0.18030906268979013, "grad_norm": 0.21101988327273596, "learning_rate": 1.8513237541616602e-05, "log_odds_chosen": 0.02276611328125, "log_odds_ratio": -0.68798828125, "logits/chosen": 0.1998291015625, "logits/rejected": 0.33197021484375, "logps/chosen": -0.5406494140625, "logps/rejected": -0.53857421875, "loss": 9.6392, "nll_loss": 0.546142578125, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0540008544921875, "rewards/margins": -0.00011444091796875, "rewards/rejected": -0.05388641357421875, "step": 334 }, { "epoch": 0.18084891018287333, "grad_norm": 0.1914980635784608, "learning_rate": 1.850428210986192e-05, "log_odds_chosen": 0.1123046875, "log_odds_ratio": -0.64453125, "logits/chosen": 0.38550567626953125, "logits/rejected": 0.48797607421875, "logps/chosen": -0.45477294921875, "logps/rejected": -0.4849853515625, "loss": 9.4961, "nll_loss": 0.4697265625, "rewards/accuracies": 0.5, "rewards/chosen": -0.04544830322265625, "rewards/margins": 0.00305938720703125, "rewards/rejected": -0.0485076904296875, "step": 335 }, { "epoch": 0.18138875767595655, "grad_norm": 0.19764425193311705, "learning_rate": 1.8495301967317333e-05, "log_odds_chosen": 0.1298828125, "log_odds_ratio": -0.6336669921875, "logits/chosen": 0.2918357849121094, "logits/rejected": 0.441619873046875, "logps/chosen": -0.48065185546875, "logps/rejected": -0.5213623046875, "loss": 9.5195, "nll_loss": 0.4918212890625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04807281494140625, "rewards/margins": 0.00405120849609375, "rewards/rejected": -0.0521240234375, "step": 336 }, { "epoch": 0.18192860516903975, "grad_norm": 0.2010401809970856, "learning_rate": 1.848629714007633e-05, "log_odds_chosen": 0.0684814453125, "log_odds_ratio": -0.662109375, "logits/chosen": 0.458709716796875, "logits/rejected": 0.5556755065917969, "logps/chosen": -0.5062255859375, "logps/rejected": -0.52734375, "loss": 8.8032, "nll_loss": 0.514892578125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.050628662109375, "rewards/margins": 0.0021209716796875, "rewards/rejected": -0.0527496337890625, "step": 337 }, { "epoch": 0.18246845266212294, "grad_norm": 0.18086936885493893, "learning_rate": 1.8477267654304136e-05, "log_odds_chosen": 0.0731201171875, "log_odds_ratio": -0.6591796875, "logits/chosen": 0.5045166015625, "logits/rejected": 0.5714111328125, "logps/chosen": -0.516845703125, "logps/rejected": -0.538330078125, "loss": 8.6035, "nll_loss": 0.523193359375, "rewards/accuracies": 0.75, "rewards/chosen": -0.051666259765625, "rewards/margins": 0.0021820068359375, "rewards/rejected": -0.0538482666015625, "step": 338 }, { "epoch": 0.18300830015520617, "grad_norm": 0.2047340744084215, "learning_rate": 1.846821353623762e-05, "log_odds_chosen": 0.1622314453125, "log_odds_ratio": -0.618408203125, "logits/chosen": 0.09195709228515625, "logits/rejected": 0.13402032852172852, "logps/chosen": -0.39984130859375, "logps/rejected": -0.451416015625, "loss": 9.6621, "nll_loss": 0.41015625, "rewards/accuracies": 0.9375, "rewards/chosen": -0.03998565673828125, "rewards/margins": 0.00514984130859375, "rewards/rejected": -0.045135498046875, "step": 339 }, { "epoch": 0.18354814764828936, "grad_norm": 0.20661961448835933, "learning_rate": 1.845913481218521e-05, "log_odds_chosen": 0.12451171875, "log_odds_ratio": -0.634033203125, "logits/chosen": 0.2474365234375, "logits/rejected": 0.34996795654296875, "logps/chosen": -0.4383544921875, "logps/rejected": -0.481201171875, "loss": 9.2266, "nll_loss": 0.4527587890625, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04384613037109375, "rewards/margins": 0.00431060791015625, "rewards/rejected": -0.04815673828125, "step": 340 }, { "epoch": 0.18408799514137256, "grad_norm": 0.19774068861454175, "learning_rate": 1.845003150852686e-05, "log_odds_chosen": 0.1385498046875, "log_odds_ratio": -0.630126953125, "logits/chosen": 0.310302734375, "logits/rejected": 0.4228808879852295, "logps/chosen": -0.44879150390625, "logps/rejected": -0.494384765625, "loss": 8.9077, "nll_loss": 0.45849609375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0448455810546875, "rewards/margins": 0.004608154296875, "rewards/rejected": -0.0494537353515625, "step": 341 }, { "epoch": 0.18462784263445578, "grad_norm": 0.20271057888455996, "learning_rate": 1.8440903651713922e-05, "log_odds_chosen": 0.05548095703125, "log_odds_ratio": -0.668212890625, "logits/chosen": 0.27069091796875, "logits/rejected": 0.3415985107421875, "logps/chosen": -0.6036376953125, "logps/rejected": -0.62890625, "loss": 9.3188, "nll_loss": 0.6055908203125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0603485107421875, "rewards/margins": 0.002532958984375, "rewards/rejected": -0.0628814697265625, "step": 342 }, { "epoch": 0.18516769012753898, "grad_norm": 0.1849643161201775, "learning_rate": 1.8431751268269105e-05, "log_odds_chosen": 0.1541748046875, "log_odds_ratio": -0.6231689453125, "logits/chosen": 0.39531707763671875, "logits/rejected": 0.4768218994140625, "logps/chosen": -0.46258544921875, "logps/rejected": -0.510009765625, "loss": 8.8687, "nll_loss": 0.46734619140625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04627227783203125, "rewards/margins": 0.004730224609375, "rewards/rejected": -0.05100250244140625, "step": 343 }, { "epoch": 0.18570753762062217, "grad_norm": 0.1814463771397611, "learning_rate": 1.8422574384786374e-05, "log_odds_chosen": 0.014404296875, "log_odds_ratio": -0.69140625, "logits/chosen": 0.32860565185546875, "logits/rejected": 0.5153656005859375, "logps/chosen": -0.5931396484375, "logps/rejected": -0.5784912109375, "loss": 10.3481, "nll_loss": 0.6038818359375, "rewards/accuracies": 0.5, "rewards/chosen": -0.0593414306640625, "rewards/margins": -0.00152587890625, "rewards/rejected": -0.0578155517578125, "step": 344 }, { "epoch": 0.18624738511370537, "grad_norm": 0.214125172321062, "learning_rate": 1.841337302793089e-05, "log_odds_chosen": 0.099853515625, "log_odds_ratio": -0.64599609375, "logits/chosen": 0.462066650390625, "logits/rejected": 0.569793701171875, "logps/chosen": -0.4864501953125, "logps/rejected": -0.5164794921875, "loss": 9.5527, "nll_loss": 0.4908447265625, "rewards/accuracies": 0.625, "rewards/chosen": -0.04866790771484375, "rewards/margins": 0.00296783447265625, "rewards/rejected": -0.0516357421875, "step": 345 }, { "epoch": 0.1867872326067886, "grad_norm": 0.1956198927418159, "learning_rate": 1.8404147224438924e-05, "log_odds_chosen": 0.1488037109375, "log_odds_ratio": -0.624267578125, "logits/chosen": 0.5089263916015625, "logits/rejected": 0.5581932067871094, "logps/chosen": -0.4708251953125, "logps/rejected": -0.519287109375, "loss": 8.6875, "nll_loss": 0.47845458984375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0471038818359375, "rewards/margins": 0.00478363037109375, "rewards/rejected": -0.05188751220703125, "step": 346 }, { "epoch": 0.18732708009987178, "grad_norm": 0.2036627714594957, "learning_rate": 1.839489700111778e-05, "log_odds_chosen": 0.1298828125, "log_odds_ratio": -0.63427734375, "logits/chosen": 0.056400299072265625, "logits/rejected": 0.1550140380859375, "logps/chosen": -0.46917724609375, "logps/rejected": -0.50848388671875, "loss": 9.2188, "nll_loss": 0.47088623046875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.046966552734375, "rewards/margins": 0.003875732421875, "rewards/rejected": -0.05084228515625, "step": 347 }, { "epoch": 0.18786692759295498, "grad_norm": 0.18880885311629367, "learning_rate": 1.8385622384845726e-05, "log_odds_chosen": 0.11279296875, "log_odds_ratio": -0.64306640625, "logits/chosen": 0.4391365051269531, "logits/rejected": 0.43121337890625, "logps/chosen": -0.4857177734375, "logps/rejected": -0.5194091796875, "loss": 9.1675, "nll_loss": 0.49053955078125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04859161376953125, "rewards/margins": 0.00336456298828125, "rewards/rejected": -0.0519561767578125, "step": 348 }, { "epoch": 0.1884067750860382, "grad_norm": 0.20244389781718145, "learning_rate": 1.83763234025719e-05, "log_odds_chosen": -0.0118408203125, "log_odds_ratio": -0.7125244140625, "logits/chosen": 0.4073600769042969, "logits/rejected": 0.462890625, "logps/chosen": -0.6387939453125, "logps/rejected": -0.6033935546875, "loss": 10.1929, "nll_loss": 0.6419677734375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.06383514404296875, "rewards/margins": -0.00353240966796875, "rewards/rejected": -0.060302734375, "step": 349 }, { "epoch": 0.1889466225791214, "grad_norm": 0.21291703668996345, "learning_rate": 1.836700008131624e-05, "log_odds_chosen": 0.170166015625, "log_odds_ratio": -0.6163330078125, "logits/chosen": 0.18660736083984375, "logits/rejected": 0.29283714294433594, "logps/chosen": -0.54638671875, "logps/rejected": -0.62353515625, "loss": 9.9175, "nll_loss": 0.5516357421875, "rewards/accuracies": 0.75, "rewards/chosen": -0.0546417236328125, "rewards/margins": 0.007720947265625, "rewards/rejected": -0.0623626708984375, "step": 350 }, { "epoch": 0.1894864700722046, "grad_norm": 0.1846117255178902, "learning_rate": 1.835765244816941e-05, "log_odds_chosen": 0.130859375, "log_odds_ratio": -0.6334228515625, "logits/chosen": 0.3630709648132324, "logits/rejected": 0.4360980987548828, "logps/chosen": -0.459228515625, "logps/rejected": -0.5015869140625, "loss": 9.8486, "nll_loss": 0.465087890625, "rewards/accuracies": 0.625, "rewards/chosen": -0.04595947265625, "rewards/margins": 0.0041961669921875, "rewards/rejected": -0.0501556396484375, "step": 351 }, { "epoch": 0.19002631756528782, "grad_norm": 0.18796416241156377, "learning_rate": 1.8348280530292712e-05, "log_odds_chosen": 0.1055908203125, "log_odds_ratio": -0.6470947265625, "logits/chosen": 0.37744140625, "logits/rejected": 0.5013427734375, "logps/chosen": -0.51123046875, "logps/rejected": -0.543212890625, "loss": 9.2036, "nll_loss": 0.51556396484375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05113983154296875, "rewards/margins": 0.00316619873046875, "rewards/rejected": -0.0543060302734375, "step": 352 }, { "epoch": 0.190566165058371, "grad_norm": 0.18970454551514568, "learning_rate": 1.8338884354918024e-05, "log_odds_chosen": 0.072509765625, "log_odds_ratio": -0.664306640625, "logits/chosen": 0.4402008056640625, "logits/rejected": 0.4995880126953125, "logps/chosen": -0.4774169921875, "logps/rejected": -0.50616455078125, "loss": 9.3022, "nll_loss": 0.4854736328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.04773712158203125, "rewards/margins": 0.00286102294921875, "rewards/rejected": -0.05059814453125, "step": 353 }, { "epoch": 0.1911060125514542, "grad_norm": 0.2032179887003747, "learning_rate": 1.8329463949347687e-05, "log_odds_chosen": 0.091552734375, "log_odds_ratio": -0.65087890625, "logits/chosen": 0.4312896728515625, "logits/rejected": 0.5533905029296875, "logps/chosen": -0.4910888671875, "logps/rejected": -0.5216064453125, "loss": 9.6499, "nll_loss": 0.5018310546875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0490570068359375, "rewards/margins": 0.0030670166015625, "rewards/rejected": -0.0521240234375, "step": 354 }, { "epoch": 0.19164586004453743, "grad_norm": 0.18807752234515487, "learning_rate": 1.8320019340954467e-05, "log_odds_chosen": 0.1923828125, "log_odds_ratio": -0.6090087890625, "logits/chosen": 0.25174713134765625, "logits/rejected": 0.23696136474609375, "logps/chosen": -0.4500732421875, "logps/rejected": -0.508056640625, "loss": 9.8896, "nll_loss": 0.45135498046875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04503631591796875, "rewards/margins": 0.00580596923828125, "rewards/rejected": -0.05084228515625, "step": 355 }, { "epoch": 0.19218570753762063, "grad_norm": 0.18496845802858336, "learning_rate": 1.831055055718145e-05, "log_odds_chosen": 0.1107177734375, "log_odds_ratio": -0.643310546875, "logits/chosen": 0.45276689529418945, "logits/rejected": 0.46619415283203125, "logps/chosen": -0.531982421875, "logps/rejected": -0.568359375, "loss": 9.4927, "nll_loss": 0.5396728515625, "rewards/accuracies": 0.75, "rewards/chosen": -0.05323028564453125, "rewards/margins": 0.00365447998046875, "rewards/rejected": -0.056884765625, "step": 356 }, { "epoch": 0.19272555503070382, "grad_norm": 0.2071876666451634, "learning_rate": 1.8301057625541962e-05, "log_odds_chosen": 0.1539306640625, "log_odds_ratio": -0.6234130859375, "logits/chosen": 0.5481147766113281, "logits/rejected": 0.6039352416992188, "logps/chosen": -0.4857177734375, "logps/rejected": -0.53515625, "loss": 9.6665, "nll_loss": 0.49444580078125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0485382080078125, "rewards/margins": 0.00495147705078125, "rewards/rejected": -0.05348968505859375, "step": 357 }, { "epoch": 0.19326540252378702, "grad_norm": 0.21251328178304885, "learning_rate": 1.8291540573619503e-05, "log_odds_chosen": 0.0164794921875, "log_odds_ratio": -0.68896484375, "logits/chosen": 0.4047698974609375, "logits/rejected": 0.567718505859375, "logps/chosen": -0.6497802734375, "logps/rejected": -0.6417236328125, "loss": 10.3994, "nll_loss": 0.6549072265625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.06502532958984375, "rewards/margins": -0.00087738037109375, "rewards/rejected": -0.06414794921875, "step": 358 }, { "epoch": 0.19380525001687024, "grad_norm": 0.19246201755979053, "learning_rate": 1.8281999429067653e-05, "log_odds_chosen": 0.1688232421875, "log_odds_ratio": -0.6243896484375, "logits/chosen": 0.2150421142578125, "logits/rejected": 0.26636505126953125, "logps/chosen": -0.4635009765625, "logps/rejected": -0.514892578125, "loss": 9.2681, "nll_loss": 0.470703125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0463409423828125, "rewards/margins": 0.005157470703125, "rewards/rejected": -0.0514984130859375, "step": 359 }, { "epoch": 0.19434509750995344, "grad_norm": 0.2039755691789655, "learning_rate": 1.8272434219610003e-05, "log_odds_chosen": 0.1763916015625, "log_odds_ratio": -0.6142578125, "logits/chosen": 0.394195556640625, "logits/rejected": 0.39483642578125, "logps/chosen": -0.6297607421875, "logps/rejected": -0.724365234375, "loss": 9.7822, "nll_loss": 0.6298828125, "rewards/accuracies": 0.75, "rewards/chosen": -0.06304931640625, "rewards/margins": 0.0094451904296875, "rewards/rejected": -0.0724945068359375, "step": 360 }, { "epoch": 0.19488494500303663, "grad_norm": 0.18238072406362416, "learning_rate": 1.8262844973040067e-05, "log_odds_chosen": 0.0989990234375, "log_odds_ratio": -0.653564453125, "logits/chosen": 0.2412109375, "logits/rejected": 0.36834716796875, "logps/chosen": -0.475830078125, "logps/rejected": -0.50732421875, "loss": 9.2979, "nll_loss": 0.4810791015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.04759979248046875, "rewards/margins": 0.003143310546875, "rewards/rejected": -0.05074310302734375, "step": 361 }, { "epoch": 0.19542479249611985, "grad_norm": 0.247784336639198, "learning_rate": 1.8253231717221206e-05, "log_odds_chosen": 0.1611328125, "log_odds_ratio": -0.6239013671875, "logits/chosen": 0.36627197265625, "logits/rejected": 0.4056243896484375, "logps/chosen": -0.47412109375, "logps/rejected": -0.5372314453125, "loss": 9.626, "nll_loss": 0.48974609375, "rewards/accuracies": 0.75, "rewards/chosen": -0.04740142822265625, "rewards/margins": 0.00632476806640625, "rewards/rejected": -0.0537261962890625, "step": 362 }, { "epoch": 0.19596463998920305, "grad_norm": 0.19973897148254063, "learning_rate": 1.8243594480086534e-05, "log_odds_chosen": 0.013671875, "log_odds_ratio": -0.689208984375, "logits/chosen": 0.5231838226318359, "logits/rejected": 0.6620864868164062, "logps/chosen": -0.50543212890625, "logps/rejected": -0.5040283203125, "loss": 9.7559, "nll_loss": 0.51611328125, "rewards/accuracies": 0.375, "rewards/chosen": -0.0505523681640625, "rewards/margins": -0.0001373291015625, "rewards/rejected": -0.0504150390625, "step": 363 }, { "epoch": 0.19650448748228624, "grad_norm": 0.21784484165450743, "learning_rate": 1.8233933289638856e-05, "log_odds_chosen": -0.07647705078125, "log_odds_ratio": -0.7432861328125, "logits/chosen": 0.567779541015625, "logits/rejected": 0.76239013671875, "logps/chosen": -0.88214111328125, "logps/rejected": -0.8048095703125, "loss": 11.6372, "nll_loss": 0.895263671875, "rewards/accuracies": 0.25, "rewards/chosen": -0.088165283203125, "rewards/margins": -0.0077056884765625, "rewards/rejected": -0.0804595947265625, "step": 364 }, { "epoch": 0.19704433497536947, "grad_norm": 0.19570879272262792, "learning_rate": 1.822424817395058e-05, "log_odds_chosen": 0.022705078125, "log_odds_ratio": -0.6905517578125, "logits/chosen": 0.5746307373046875, "logits/rejected": 0.6804962158203125, "logps/chosen": -0.48992919921875, "logps/rejected": -0.493896484375, "loss": 9.6724, "nll_loss": 0.50396728515625, "rewards/accuracies": 0.5, "rewards/chosen": -0.048980712890625, "rewards/margins": 0.0004119873046875, "rewards/rejected": -0.0493927001953125, "step": 365 }, { "epoch": 0.19758418246845266, "grad_norm": 0.17920997008274173, "learning_rate": 1.821453916116363e-05, "log_odds_chosen": 0.1185302734375, "log_odds_ratio": -0.6405029296875, "logits/chosen": 0.275909423828125, "logits/rejected": 0.38970947265625, "logps/chosen": -0.4681396484375, "logps/rejected": -0.5032958984375, "loss": 8.9097, "nll_loss": 0.4884033203125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.046844482421875, "rewards/margins": 0.00347137451171875, "rewards/rejected": -0.05031585693359375, "step": 366 }, { "epoch": 0.19812402996153586, "grad_norm": 0.20046048314367682, "learning_rate": 1.8204806279489365e-05, "log_odds_chosen": 0.0672607421875, "log_odds_ratio": -0.6656494140625, "logits/chosen": 0.09539794921875, "logits/rejected": 0.28606414794921875, "logps/chosen": -0.5692138671875, "logps/rejected": -0.57080078125, "loss": 9.5483, "nll_loss": 0.57000732421875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.056884765625, "rewards/margins": 0.00018310546875, "rewards/rejected": -0.05706787109375, "step": 367 }, { "epoch": 0.19866387745461908, "grad_norm": 0.20802737436967286, "learning_rate": 1.8195049557208503e-05, "log_odds_chosen": 0.08740234375, "log_odds_ratio": -0.6533203125, "logits/chosen": 0.3693356513977051, "logits/rejected": 0.5220947265625, "logps/chosen": -0.5152587890625, "logps/rejected": -0.5445556640625, "loss": 8.8052, "nll_loss": 0.5205078125, "rewards/accuracies": 0.625, "rewards/chosen": -0.051544189453125, "rewards/margins": 0.0029449462890625, "rewards/rejected": -0.0544891357421875, "step": 368 }, { "epoch": 0.19920372494770228, "grad_norm": 0.2008498658383722, "learning_rate": 1.8185269022671035e-05, "log_odds_chosen": -0.017333984375, "log_odds_ratio": -0.708740234375, "logits/chosen": 0.23699951171875, "logits/rejected": 0.3961181640625, "logps/chosen": -0.638916015625, "logps/rejected": -0.5986328125, "loss": 9.522, "nll_loss": 0.6416015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.0638885498046875, "rewards/margins": -0.004058837890625, "rewards/rejected": -0.0598297119140625, "step": 369 }, { "epoch": 0.19974357244078547, "grad_norm": 0.17864865668927188, "learning_rate": 1.8175464704296143e-05, "log_odds_chosen": 0.06689453125, "log_odds_ratio": -0.6630859375, "logits/chosen": 0.428619384765625, "logits/rejected": 0.535125732421875, "logps/chosen": -0.4833984375, "logps/rejected": -0.502197265625, "loss": 8.8096, "nll_loss": 0.4976806640625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04833984375, "rewards/margins": 0.001861572265625, "rewards/rejected": -0.050201416015625, "step": 370 }, { "epoch": 0.2002834199338687, "grad_norm": 0.17934389183578475, "learning_rate": 1.816563663057211e-05, "log_odds_chosen": 0.06622314453125, "log_odds_ratio": -0.6644287109375, "logits/chosen": 0.4371185302734375, "logits/rejected": 0.51080322265625, "logps/chosen": -0.502685546875, "logps/rejected": -0.52325439453125, "loss": 8.7622, "nll_loss": 0.5081787109375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0502777099609375, "rewards/margins": 0.0020751953125, "rewards/rejected": -0.0523529052734375, "step": 371 }, { "epoch": 0.2008232674269519, "grad_norm": 0.20236780122893552, "learning_rate": 1.815578483005626e-05, "log_odds_chosen": 0.0655517578125, "log_odds_ratio": -0.665283203125, "logits/chosen": 0.575103759765625, "logits/rejected": 0.652587890625, "logps/chosen": -0.52069091796875, "logps/rejected": -0.534423828125, "loss": 10.4121, "nll_loss": 0.53790283203125, "rewards/accuracies": 0.5, "rewards/chosen": -0.052093505859375, "rewards/margins": 0.0013275146484375, "rewards/rejected": -0.0534210205078125, "step": 372 }, { "epoch": 0.20136311492003509, "grad_norm": 0.1876024552089345, "learning_rate": 1.8145909331374844e-05, "log_odds_chosen": 0.027099609375, "log_odds_ratio": -0.682861328125, "logits/chosen": 0.3748779296875, "logits/rejected": 0.44879150390625, "logps/chosen": -0.4849853515625, "logps/rejected": -0.4862060546875, "loss": 9.2959, "nll_loss": 0.4967041015625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0484771728515625, "rewards/margins": 0.0001220703125, "rewards/rejected": -0.0485992431640625, "step": 373 }, { "epoch": 0.20190296241311828, "grad_norm": 0.19240615776845776, "learning_rate": 1.8136010163222982e-05, "log_odds_chosen": 0.13018798828125, "log_odds_ratio": -0.6334228515625, "logits/chosen": 0.3206329345703125, "logits/rejected": 0.4482421875, "logps/chosen": -0.45562744140625, "logps/rejected": -0.4949951171875, "loss": 9.6392, "nll_loss": 0.4615478515625, "rewards/accuracies": 0.75, "rewards/chosen": -0.045562744140625, "rewards/margins": 0.0039215087890625, "rewards/rejected": -0.0494842529296875, "step": 374 }, { "epoch": 0.2024428099062015, "grad_norm": 0.18133795872473607, "learning_rate": 1.812608735436457e-05, "log_odds_chosen": -0.0096435546875, "log_odds_ratio": -0.70166015625, "logits/chosen": 0.524932861328125, "logits/rejected": 0.6079940795898438, "logps/chosen": -0.5125732421875, "logps/rejected": -0.497314453125, "loss": 8.9473, "nll_loss": 0.5203857421875, "rewards/accuracies": 0.375, "rewards/chosen": -0.05126953125, "rewards/margins": -0.00157928466796875, "rewards/rejected": -0.04969024658203125, "step": 375 }, { "epoch": 0.2029826573992847, "grad_norm": 0.177975033374836, "learning_rate": 1.811614093363219e-05, "log_odds_chosen": 0.0972900390625, "log_odds_ratio": -0.6507568359375, "logits/chosen": 0.23065185546875, "logits/rejected": 0.3099822998046875, "logps/chosen": -0.43524169921875, "logps/rejected": -0.4615478515625, "loss": 8.7271, "nll_loss": 0.44232177734375, "rewards/accuracies": 0.625, "rewards/chosen": -0.04351806640625, "rewards/margins": 0.00263214111328125, "rewards/rejected": -0.04615020751953125, "step": 376 }, { "epoch": 0.2035225048923679, "grad_norm": 0.1902338303902949, "learning_rate": 1.8106170929927035e-05, "log_odds_chosen": -0.1031494140625, "log_odds_ratio": -0.756591796875, "logits/chosen": 0.32779693603515625, "logits/rejected": 0.421051025390625, "logps/chosen": -0.6302490234375, "logps/rejected": -0.5743408203125, "loss": 9.4976, "nll_loss": 0.635986328125, "rewards/accuracies": 0.5, "rewards/chosen": -0.0630340576171875, "rewards/margins": -0.0055694580078125, "rewards/rejected": -0.057464599609375, "step": 377 }, { "epoch": 0.20406235238545112, "grad_norm": 0.22886504095087704, "learning_rate": 1.8096177372218835e-05, "log_odds_chosen": 0.10369873046875, "log_odds_ratio": -0.6505126953125, "logits/chosen": 0.29345703125, "logits/rejected": 0.2894096374511719, "logps/chosen": -0.681884765625, "logps/rejected": -0.6947021484375, "loss": 10.0513, "nll_loss": 0.6773681640625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0682373046875, "rewards/margins": 0.0012664794921875, "rewards/rejected": -0.0695037841796875, "step": 378 }, { "epoch": 0.2046021998785343, "grad_norm": 0.1974666053156819, "learning_rate": 1.8086160289545738e-05, "log_odds_chosen": 0.1494140625, "log_odds_ratio": -0.6265869140625, "logits/chosen": 0.22914886474609375, "logits/rejected": 0.27874755859375, "logps/chosen": -0.4359130859375, "logps/rejected": -0.4766845703125, "loss": 9.5728, "nll_loss": 0.4403076171875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0435638427734375, "rewards/margins": 0.00408935546875, "rewards/rejected": -0.0476531982421875, "step": 379 }, { "epoch": 0.2051420473716175, "grad_norm": 0.18546771570977305, "learning_rate": 1.8076119711014265e-05, "log_odds_chosen": 0.1158447265625, "log_odds_ratio": -0.64208984375, "logits/chosen": 0.452606201171875, "logits/rejected": 0.5120315551757812, "logps/chosen": -0.49658203125, "logps/rejected": -0.5318603515625, "loss": 9.1836, "nll_loss": 0.50146484375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0496673583984375, "rewards/margins": 0.0035247802734375, "rewards/rejected": -0.053192138671875, "step": 380 }, { "epoch": 0.20568189486470073, "grad_norm": 0.1908663288531799, "learning_rate": 1.8066055665799203e-05, "log_odds_chosen": 0.0482177734375, "log_odds_ratio": -0.6749267578125, "logits/chosen": 0.2927703857421875, "logits/rejected": 0.4131889343261719, "logps/chosen": -0.497802734375, "logps/rejected": -0.510009765625, "loss": 8.8599, "nll_loss": 0.5177001953125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0497894287109375, "rewards/margins": 0.0012054443359375, "rewards/rejected": -0.050994873046875, "step": 381 }, { "epoch": 0.20622174235778393, "grad_norm": 0.19206064488923785, "learning_rate": 1.805596818314353e-05, "log_odds_chosen": 0.053466796875, "log_odds_ratio": -0.6785888671875, "logits/chosen": 0.1192779541015625, "logits/rejected": 0.3354339599609375, "logps/chosen": -0.5408935546875, "logps/rejected": -0.54931640625, "loss": 9.2324, "nll_loss": 0.545166015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.05408477783203125, "rewards/margins": 0.00083160400390625, "rewards/rejected": -0.0549163818359375, "step": 382 }, { "epoch": 0.20676158985086712, "grad_norm": 0.2031749679839212, "learning_rate": 1.804585729235832e-05, "log_odds_chosen": 0.118408203125, "log_odds_ratio": -0.6463623046875, "logits/chosen": -0.017313003540039062, "logits/rejected": 0.05204010009765625, "logps/chosen": -0.54754638671875, "logps/rejected": -0.56170654296875, "loss": 9.7249, "nll_loss": 0.54583740234375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05477142333984375, "rewards/margins": 0.00141143798828125, "rewards/rejected": -0.056182861328125, "step": 383 }, { "epoch": 0.20730143734395035, "grad_norm": 0.18863954742896796, "learning_rate": 1.8035723022822663e-05, "log_odds_chosen": 0.198974609375, "log_odds_ratio": -0.6025390625, "logits/chosen": 0.1729888916015625, "logits/rejected": 0.21147632598876953, "logps/chosen": -0.4285888671875, "logps/rejected": -0.4954833984375, "loss": 8.1699, "nll_loss": 0.4293212890625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04286956787109375, "rewards/margins": 0.00665283203125, "rewards/rejected": -0.04952239990234375, "step": 384 }, { "epoch": 0.20784128483703354, "grad_norm": 0.17377776402371758, "learning_rate": 1.8025565403983592e-05, "log_odds_chosen": 0.1710205078125, "log_odds_ratio": -0.6173095703125, "logits/chosen": 0.0717926025390625, "logits/rejected": 0.1530928611755371, "logps/chosen": -0.3997802734375, "logps/rejected": -0.4541015625, "loss": 8.374, "nll_loss": 0.4085693359375, "rewards/accuracies": 0.75, "rewards/chosen": -0.03997802734375, "rewards/margins": 0.00545501708984375, "rewards/rejected": -0.04543304443359375, "step": 385 }, { "epoch": 0.20838113233011674, "grad_norm": 0.2026428185668976, "learning_rate": 1.801538446535597e-05, "log_odds_chosen": 0.13916015625, "log_odds_ratio": -0.629150390625, "logits/chosen": 0.13873291015625, "logits/rejected": 0.201690673828125, "logps/chosen": -0.40740966796875, "logps/rejected": -0.4500732421875, "loss": 9.1079, "nll_loss": 0.41168212890625, "rewards/accuracies": 0.75, "rewards/chosen": -0.040740966796875, "rewards/margins": 0.0042572021484375, "rewards/rejected": -0.0449981689453125, "step": 386 }, { "epoch": 0.20892097982319996, "grad_norm": 0.19373069840882304, "learning_rate": 1.800518023652243e-05, "log_odds_chosen": 0.0419921875, "log_odds_ratio": -0.6767578125, "logits/chosen": 0.30582427978515625, "logits/rejected": 0.37689208984375, "logps/chosen": -0.6197509765625, "logps/rejected": -0.6175537109375, "loss": 9.4062, "nll_loss": 0.6396484375, "rewards/accuracies": 0.4375, "rewards/chosen": -0.06198883056640625, "rewards/margins": -0.00017547607421875, "rewards/rejected": -0.0618133544921875, "step": 387 }, { "epoch": 0.20946082731628315, "grad_norm": 0.20630151725516455, "learning_rate": 1.799495274713328e-05, "log_odds_chosen": 0.1494140625, "log_odds_ratio": -0.625244140625, "logits/chosen": 0.4179372787475586, "logits/rejected": 0.466888427734375, "logps/chosen": -0.449951171875, "logps/rejected": -0.5008544921875, "loss": 9.7656, "nll_loss": 0.4622802734375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04502105712890625, "rewards/margins": 0.0050506591796875, "rewards/rejected": -0.05007171630859375, "step": 388 }, { "epoch": 0.21000067480936635, "grad_norm": 0.20560340700194282, "learning_rate": 1.7984702026906408e-05, "log_odds_chosen": 0.08740234375, "log_odds_ratio": -0.6590576171875, "logits/chosen": 0.37177276611328125, "logits/rejected": 0.4385490417480469, "logps/chosen": -0.595703125, "logps/rejected": -0.61083984375, "loss": 9.6172, "nll_loss": 0.6075439453125, "rewards/accuracies": 0.625, "rewards/chosen": -0.059539794921875, "rewards/margins": 0.0015411376953125, "rewards/rejected": -0.0610809326171875, "step": 389 }, { "epoch": 0.21054052230244955, "grad_norm": 0.20036552351179848, "learning_rate": 1.797442810562721e-05, "log_odds_chosen": 0.112060546875, "log_odds_ratio": -0.6466064453125, "logits/chosen": 0.3292236328125, "logits/rejected": 0.490264892578125, "logps/chosen": -0.578857421875, "logps/rejected": -0.610107421875, "loss": 9.5581, "nll_loss": 0.59417724609375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05789947509765625, "rewards/margins": 0.0031280517578125, "rewards/rejected": -0.06102752685546875, "step": 390 }, { "epoch": 0.21108036979553277, "grad_norm": 0.20849383841941935, "learning_rate": 1.7964131013148494e-05, "log_odds_chosen": 0.10205078125, "log_odds_ratio": -0.653076171875, "logits/chosen": 0.17095947265625, "logits/rejected": 0.273162841796875, "logps/chosen": -0.461669921875, "logps/rejected": -0.4886474609375, "loss": 9.1323, "nll_loss": 0.47198486328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.04615020751953125, "rewards/margins": 0.00266265869140625, "rewards/rejected": -0.0488128662109375, "step": 391 }, { "epoch": 0.21162021728861596, "grad_norm": 0.18166049415095387, "learning_rate": 1.79538107793904e-05, "log_odds_chosen": 0.1251220703125, "log_odds_ratio": -0.636474609375, "logits/chosen": 0.34665679931640625, "logits/rejected": 0.37616729736328125, "logps/chosen": -0.602294921875, "logps/rejected": -0.6480712890625, "loss": 9.7329, "nll_loss": 0.6043701171875, "rewards/accuracies": 0.75, "rewards/chosen": -0.06017303466796875, "rewards/margins": 0.0046539306640625, "rewards/rejected": -0.06482696533203125, "step": 392 }, { "epoch": 0.21216006478169916, "grad_norm": 0.18592603486560666, "learning_rate": 1.79434674343403e-05, "log_odds_chosen": 0.1630859375, "log_odds_ratio": -0.61767578125, "logits/chosen": 0.3270263671875, "logits/rejected": 0.36822509765625, "logps/chosen": -0.520263671875, "logps/rejected": -0.5885009765625, "loss": 9.5664, "nll_loss": 0.537109375, "rewards/accuracies": 0.875, "rewards/chosen": -0.05200958251953125, "rewards/margins": 0.00681304931640625, "rewards/rejected": -0.0588226318359375, "step": 393 }, { "epoch": 0.21269991227478238, "grad_norm": 0.19283474795515076, "learning_rate": 1.793310100805273e-05, "log_odds_chosen": -0.00634765625, "log_odds_ratio": -0.70068359375, "logits/chosen": 0.404998779296875, "logits/rejected": 0.562286376953125, "logps/chosen": -0.604248046875, "logps/rejected": -0.591064453125, "loss": 9.8452, "nll_loss": 0.6114501953125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.06040191650390625, "rewards/margins": -0.0013275146484375, "rewards/rejected": -0.05907440185546875, "step": 394 }, { "epoch": 0.21323975976786558, "grad_norm": 0.20816849762924644, "learning_rate": 1.7922711530649296e-05, "log_odds_chosen": 0.135009765625, "log_odds_ratio": -0.6312255859375, "logits/chosen": 0.642791748046875, "logits/rejected": 0.72900390625, "logps/chosen": -0.53582763671875, "logps/rejected": -0.5823974609375, "loss": 9.3267, "nll_loss": 0.54327392578125, "rewards/accuracies": 0.75, "rewards/chosen": -0.05361175537109375, "rewards/margins": 0.00466156005859375, "rewards/rejected": -0.0582733154296875, "step": 395 }, { "epoch": 0.21377960726094877, "grad_norm": 0.2025658117245484, "learning_rate": 1.791229903231857e-05, "log_odds_chosen": 0.141357421875, "log_odds_ratio": -0.62939453125, "logits/chosen": 0.3679351806640625, "logits/rejected": 0.39464569091796875, "logps/chosen": -0.490966796875, "logps/rejected": -0.5458984375, "loss": 9.5215, "nll_loss": 0.501953125, "rewards/accuracies": 0.75, "rewards/chosen": -0.049072265625, "rewards/margins": 0.0055389404296875, "rewards/rejected": -0.0546112060546875, "step": 396 }, { "epoch": 0.214319454754032, "grad_norm": 0.19605804797033036, "learning_rate": 1.7901863543316027e-05, "log_odds_chosen": 0.109375, "log_odds_ratio": -0.6455078125, "logits/chosen": 0.8984375, "logits/rejected": 1.0120849609375, "logps/chosen": -0.552490234375, "logps/rejected": -0.589599609375, "loss": 9.1182, "nll_loss": 0.5614013671875, "rewards/accuracies": 0.5, "rewards/chosen": -0.05523681640625, "rewards/margins": 0.0037078857421875, "rewards/rejected": -0.0589447021484375, "step": 397 }, { "epoch": 0.2148593022471152, "grad_norm": 0.18572741594725478, "learning_rate": 1.789140509396394e-05, "log_odds_chosen": 0.149169921875, "log_odds_ratio": -0.6251220703125, "logits/chosen": 0.46461915969848633, "logits/rejected": 0.5454607009887695, "logps/chosen": -0.5494384765625, "logps/rejected": -0.6114501953125, "loss": 9.4116, "nll_loss": 0.5538330078125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.054901123046875, "rewards/margins": 0.006256103515625, "rewards/rejected": -0.0611572265625, "step": 398 }, { "epoch": 0.2153991497401984, "grad_norm": 0.17166439536474234, "learning_rate": 1.7880923714651296e-05, "log_odds_chosen": 0.193115234375, "log_odds_ratio": -0.6075439453125, "logits/chosen": 0.44879150390625, "logits/rejected": 0.494140625, "logps/chosen": -0.468505859375, "logps/rejected": -0.53369140625, "loss": 8.8413, "nll_loss": 0.4898681640625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0468902587890625, "rewards/margins": 0.0064239501953125, "rewards/rejected": -0.053314208984375, "step": 399 }, { "epoch": 0.2159389972332816, "grad_norm": 0.21491208448141538, "learning_rate": 1.7870419435833715e-05, "log_odds_chosen": 0.1134033203125, "log_odds_ratio": -0.64453125, "logits/chosen": 0.46215057373046875, "logits/rejected": 0.53717041015625, "logps/chosen": -0.5538330078125, "logps/rejected": -0.6029052734375, "loss": 9.3989, "nll_loss": 0.5584716796875, "rewards/accuracies": 0.75, "rewards/chosen": -0.05538177490234375, "rewards/margins": 0.00495147705078125, "rewards/rejected": -0.060333251953125, "step": 400 }, { "epoch": 0.2164788447263648, "grad_norm": 0.18758937669034553, "learning_rate": 1.7859892288033353e-05, "log_odds_chosen": 0.1123046875, "log_odds_ratio": -0.644287109375, "logits/chosen": 0.517333984375, "logits/rejected": 0.60052490234375, "logps/chosen": -0.6033935546875, "logps/rejected": -0.6356201171875, "loss": 8.8965, "nll_loss": 0.6048583984375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.060302734375, "rewards/margins": 0.00323486328125, "rewards/rejected": -0.06353759765625, "step": 401 }, { "epoch": 0.217018692219448, "grad_norm": 0.2069175379621725, "learning_rate": 1.784934230183882e-05, "log_odds_chosen": 0.1593017578125, "log_odds_ratio": -0.63037109375, "logits/chosen": 0.0736083984375, "logits/rejected": 0.15557289123535156, "logps/chosen": -0.53271484375, "logps/rejected": -0.5780029296875, "loss": 9.9141, "nll_loss": 0.535888671875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05323028564453125, "rewards/margins": 0.00452423095703125, "rewards/rejected": -0.0577545166015625, "step": 402 }, { "epoch": 0.21755853971253122, "grad_norm": 0.22016328500343865, "learning_rate": 1.7838769507905077e-05, "log_odds_chosen": 0.1068115234375, "log_odds_ratio": -0.648193359375, "logits/chosen": 0.473421573638916, "logits/rejected": 0.4524822235107422, "logps/chosen": -0.56982421875, "logps/rejected": -0.5975341796875, "loss": 9.2388, "nll_loss": 0.5826416015625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05701446533203125, "rewards/margins": 0.00270843505859375, "rewards/rejected": -0.059722900390625, "step": 403 }, { "epoch": 0.21809838720561442, "grad_norm": 0.22229389998871193, "learning_rate": 1.7828173936953368e-05, "log_odds_chosen": 0.18359375, "log_odds_ratio": -0.6075439453125, "logits/chosen": 0.0736236572265625, "logits/rejected": 0.08709716796875, "logps/chosen": -0.43463134765625, "logps/rejected": -0.495849609375, "loss": 9.948, "nll_loss": 0.44329833984375, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0434417724609375, "rewards/margins": 0.0061492919921875, "rewards/rejected": -0.049591064453125, "step": 404 }, { "epoch": 0.21863823469869761, "grad_norm": 0.22161437548021676, "learning_rate": 1.7817555619771115e-05, "log_odds_chosen": 0.1285400390625, "log_odds_ratio": -0.64013671875, "logits/chosen": 0.33477783203125, "logits/rejected": 0.49216461181640625, "logps/chosen": -0.57159423828125, "logps/rejected": -0.611328125, "loss": 10.3745, "nll_loss": 0.5775146484375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05716705322265625, "rewards/margins": 0.00395965576171875, "rewards/rejected": -0.061126708984375, "step": 405 }, { "epoch": 0.2191780821917808, "grad_norm": 0.1924334774471778, "learning_rate": 1.780691458721183e-05, "log_odds_chosen": 0.2183837890625, "log_odds_ratio": -0.593017578125, "logits/chosen": 0.184967041015625, "logits/rejected": 0.21706771850585938, "logps/chosen": -0.439697265625, "logps/rejected": -0.5189208984375, "loss": 8.9692, "nll_loss": 0.4417724609375, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04396820068359375, "rewards/margins": 0.00791168212890625, "rewards/rejected": -0.0518798828125, "step": 406 }, { "epoch": 0.21971792968486403, "grad_norm": 0.19371101074957603, "learning_rate": 1.779625087019504e-05, "log_odds_chosen": 0.0794677734375, "log_odds_ratio": -0.6602783203125, "logits/chosen": 0.6094970703125, "logits/rejected": 0.7154312133789062, "logps/chosen": -0.4964599609375, "logps/rejected": -0.529541015625, "loss": 9.3291, "nll_loss": 0.5166015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.0496368408203125, "rewards/margins": 0.00334930419921875, "rewards/rejected": -0.05298614501953125, "step": 407 }, { "epoch": 0.22025777717794723, "grad_norm": 0.19329780535762633, "learning_rate": 1.778556449970618e-05, "log_odds_chosen": 0.0926513671875, "log_odds_ratio": -0.6566162109375, "logits/chosen": 0.210479736328125, "logits/rejected": 0.3552570343017578, "logps/chosen": -0.620361328125, "logps/rejected": -0.6302490234375, "loss": 8.7485, "nll_loss": 0.630615234375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.06203460693359375, "rewards/margins": 0.00098419189453125, "rewards/rejected": -0.063018798828125, "step": 408 }, { "epoch": 0.22079762467103042, "grad_norm": 0.18924118730651956, "learning_rate": 1.7774855506796497e-05, "log_odds_chosen": 0.1688232421875, "log_odds_ratio": -0.6180419921875, "logits/chosen": 0.7030792236328125, "logits/rejected": 0.73126220703125, "logps/chosen": -0.5211181640625, "logps/rejected": -0.5816650390625, "loss": 9.5249, "nll_loss": 0.524658203125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0521087646484375, "rewards/margins": 0.0060272216796875, "rewards/rejected": -0.058135986328125, "step": 409 }, { "epoch": 0.22133747216411365, "grad_norm": 0.18370680366736822, "learning_rate": 1.7764123922582988e-05, "log_odds_chosen": 0.225341796875, "log_odds_ratio": -0.5909423828125, "logits/chosen": 0.27243804931640625, "logits/rejected": 0.34105682373046875, "logps/chosen": -0.41314697265625, "logps/rejected": -0.4833984375, "loss": 9.4165, "nll_loss": 0.42816162109375, "rewards/accuracies": 0.875, "rewards/chosen": -0.04131317138671875, "rewards/margins": 0.00702667236328125, "rewards/rejected": -0.04833984375, "step": 410 }, { "epoch": 0.22187731965719684, "grad_norm": 0.2098915139412721, "learning_rate": 1.7753369778248292e-05, "log_odds_chosen": 0.1630859375, "log_odds_ratio": -0.6197509765625, "logits/chosen": 0.33155059814453125, "logits/rejected": 0.3855743408203125, "logps/chosen": -0.55010986328125, "logps/rejected": -0.5970458984375, "loss": 9.1257, "nll_loss": 0.55767822265625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05504608154296875, "rewards/margins": 0.00466156005859375, "rewards/rejected": -0.0597076416015625, "step": 411 }, { "epoch": 0.22241716715028004, "grad_norm": 0.20269790948743738, "learning_rate": 1.774259310504059e-05, "log_odds_chosen": 0.08563232421875, "log_odds_ratio": -0.6546630859375, "logits/chosen": 0.514373779296875, "logits/rejected": 0.5004196166992188, "logps/chosen": -0.5599365234375, "logps/rejected": -0.5831298828125, "loss": 10.291, "nll_loss": 0.57257080078125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05600738525390625, "rewards/margins": 0.0023345947265625, "rewards/rejected": -0.05834197998046875, "step": 412 }, { "epoch": 0.22295701464336326, "grad_norm": 0.1827066729546108, "learning_rate": 1.7731793934273532e-05, "log_odds_chosen": 0.124267578125, "log_odds_ratio": -0.63671875, "logits/chosen": 0.29616546630859375, "logits/rejected": 0.383819580078125, "logps/chosen": -0.4393310546875, "logps/rejected": -0.472900390625, "loss": 9.314, "nll_loss": 0.44342041015625, "rewards/accuracies": 0.75, "rewards/chosen": -0.04395294189453125, "rewards/margins": 0.00333404541015625, "rewards/rejected": -0.0472869873046875, "step": 413 }, { "epoch": 0.22349686213644646, "grad_norm": 0.2087198220602066, "learning_rate": 1.7720972297326144e-05, "log_odds_chosen": 0.0859375, "log_odds_ratio": -0.6568603515625, "logits/chosen": 0.282318115234375, "logits/rejected": 0.3927764892578125, "logps/chosen": -0.5447998046875, "logps/rejected": -0.5758056640625, "loss": 9.6553, "nll_loss": 0.54345703125, "rewards/accuracies": 0.625, "rewards/chosen": -0.054473876953125, "rewards/margins": 0.003082275390625, "rewards/rejected": -0.05755615234375, "step": 414 }, { "epoch": 0.22403670962952965, "grad_norm": 0.2099195414927431, "learning_rate": 1.771012822564272e-05, "log_odds_chosen": 0.103759765625, "log_odds_ratio": -0.6485595703125, "logits/chosen": 0.3404541015625, "logits/rejected": 0.43744325637817383, "logps/chosen": -0.519287109375, "logps/rejected": -0.5521240234375, "loss": 9.7678, "nll_loss": 0.5343017578125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05190277099609375, "rewards/margins": 0.00325775146484375, "rewards/rejected": -0.0551605224609375, "step": 415 }, { "epoch": 0.22457655712261287, "grad_norm": 0.18604375407494445, "learning_rate": 1.7699261750732753e-05, "log_odds_chosen": 0.1480712890625, "log_odds_ratio": -0.6268310546875, "logits/chosen": 0.069305419921875, "logits/rejected": 0.1444854736328125, "logps/chosen": -0.43646240234375, "logps/rejected": -0.4825439453125, "loss": 8.7729, "nll_loss": 0.45379638671875, "rewards/accuracies": 0.75, "rewards/chosen": -0.04364776611328125, "rewards/margins": 0.00461578369140625, "rewards/rejected": -0.0482635498046875, "step": 416 }, { "epoch": 0.22511640461569607, "grad_norm": 0.181100692347699, "learning_rate": 1.7688372904170824e-05, "log_odds_chosen": 0.0379638671875, "log_odds_ratio": -0.6807861328125, "logits/chosen": 0.377655029296875, "logits/rejected": 0.5057182312011719, "logps/chosen": -0.48626708984375, "logps/rejected": -0.490966796875, "loss": 9.395, "nll_loss": 0.5015869140625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.048618316650390625, "rewards/margins": 0.00048828125, "rewards/rejected": -0.04911041259765625, "step": 417 }, { "epoch": 0.22565625210877926, "grad_norm": 0.18051279168097734, "learning_rate": 1.7677461717596526e-05, "log_odds_chosen": -0.0003662109375, "log_odds_ratio": -0.69384765625, "logits/chosen": 0.811492919921875, "logits/rejected": 0.9306640625, "logps/chosen": -0.6295166015625, "logps/rejected": -0.6270751953125, "loss": 9.832, "nll_loss": 0.646484375, "rewards/accuracies": 0.5, "rewards/chosen": -0.06298828125, "rewards/margins": -0.0002593994140625, "rewards/rejected": -0.0627288818359375, "step": 418 }, { "epoch": 0.22619609960186246, "grad_norm": 0.2269264282476391, "learning_rate": 1.766652822271436e-05, "log_odds_chosen": -0.005126953125, "log_odds_ratio": -0.698486328125, "logits/chosen": 0.530975341796875, "logits/rejected": 0.58636474609375, "logps/chosen": -0.5452880859375, "logps/rejected": -0.5391845703125, "loss": 10.4961, "nll_loss": 0.552490234375, "rewards/accuracies": 0.5, "rewards/chosen": -0.054534912109375, "rewards/margins": -0.0006103515625, "rewards/rejected": -0.053924560546875, "step": 419 }, { "epoch": 0.22673594709494568, "grad_norm": 0.19714113143180528, "learning_rate": 1.7655572451293653e-05, "log_odds_chosen": 0.0235595703125, "log_odds_ratio": -0.6876220703125, "logits/chosen": 0.05556488037109375, "logits/rejected": 0.2556915283203125, "logps/chosen": -0.5765380859375, "logps/rejected": -0.565185546875, "loss": 10.2554, "nll_loss": 0.5888671875, "rewards/accuracies": 0.625, "rewards/chosen": -0.05765533447265625, "rewards/margins": -0.00115203857421875, "rewards/rejected": -0.0565032958984375, "step": 420 }, { "epoch": 0.22727579458802888, "grad_norm": 0.19524951859439443, "learning_rate": 1.7644594435168453e-05, "log_odds_chosen": 0.054931640625, "log_odds_ratio": -0.666748046875, "logits/chosen": 0.41956520080566406, "logits/rejected": 0.496734619140625, "logps/chosen": -0.4591064453125, "logps/rejected": -0.4761962890625, "loss": 9.4312, "nll_loss": 0.4691162109375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0459136962890625, "rewards/margins": 0.001708984375, "rewards/rejected": -0.0476226806640625, "step": 421 }, { "epoch": 0.22781564208111207, "grad_norm": 0.18801530518613951, "learning_rate": 1.763359420623745e-05, "log_odds_chosen": 0.0845947265625, "log_odds_ratio": -0.653076171875, "logits/chosen": 0.2504730224609375, "logits/rejected": 0.3396759033203125, "logps/chosen": -0.3988037109375, "logps/rejected": -0.42578125, "loss": 9.2456, "nll_loss": 0.404052734375, "rewards/accuracies": 0.75, "rewards/chosen": -0.03989410400390625, "rewards/margins": 0.00267791748046875, "rewards/rejected": -0.042572021484375, "step": 422 }, { "epoch": 0.2283554895741953, "grad_norm": 0.20281550475051524, "learning_rate": 1.7622571796463878e-05, "log_odds_chosen": 0.01708984375, "log_odds_ratio": -0.6875, "logits/chosen": 0.7734527587890625, "logits/rejected": 0.786285400390625, "logps/chosen": -0.5859375, "logps/rejected": -0.575439453125, "loss": 10.2148, "nll_loss": 0.60272216796875, "rewards/accuracies": 0.5, "rewards/chosen": -0.058563232421875, "rewards/margins": -0.00103759765625, "rewards/rejected": -0.057525634765625, "step": 423 }, { "epoch": 0.2288953370672785, "grad_norm": 0.1802881328550753, "learning_rate": 1.761152723787542e-05, "log_odds_chosen": 0.0533447265625, "log_odds_ratio": -0.669921875, "logits/chosen": 0.40756988525390625, "logits/rejected": 0.3980712890625, "logps/chosen": -0.5406494140625, "logps/rejected": -0.5440673828125, "loss": 8.8579, "nll_loss": 0.54150390625, "rewards/accuracies": 0.625, "rewards/chosen": -0.05405426025390625, "rewards/margins": 0.00038909912109375, "rewards/rejected": -0.054443359375, "step": 424 }, { "epoch": 0.2294351845603617, "grad_norm": 0.19119063856309026, "learning_rate": 1.760046056256412e-05, "log_odds_chosen": 0.03082275390625, "log_odds_ratio": -0.679931640625, "logits/chosen": 0.32177734375, "logits/rejected": 0.4844818115234375, "logps/chosen": -0.48980712890625, "logps/rejected": -0.4927978515625, "loss": 9.2017, "nll_loss": 0.4984130859375, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0489654541015625, "rewards/margins": 0.00032806396484375, "rewards/rejected": -0.04929351806640625, "step": 425 }, { "epoch": 0.2299750320534449, "grad_norm": 0.1861793664798319, "learning_rate": 1.758937180268628e-05, "log_odds_chosen": 0.02056884765625, "log_odds_ratio": -0.688232421875, "logits/chosen": 0.12348556518554688, "logits/rejected": 0.2476806640625, "logps/chosen": -0.49627685546875, "logps/rejected": -0.4903564453125, "loss": 8.6787, "nll_loss": 0.4974365234375, "rewards/accuracies": 0.5, "rewards/chosen": -0.04962921142578125, "rewards/margins": -0.00058746337890625, "rewards/rejected": -0.049041748046875, "step": 426 }, { "epoch": 0.2305148795465281, "grad_norm": 0.19446685274282122, "learning_rate": 1.7578260990462372e-05, "log_odds_chosen": 0.065185546875, "log_odds_ratio": -0.6663818359375, "logits/chosen": 0.441497802734375, "logits/rejected": 0.516876220703125, "logps/chosen": -0.50146484375, "logps/rejected": -0.520263671875, "loss": 9.0205, "nll_loss": 0.5052490234375, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0501556396484375, "rewards/margins": 0.0018310546875, "rewards/rejected": -0.0519866943359375, "step": 427 }, { "epoch": 0.2310547270396113, "grad_norm": 0.21938201913374858, "learning_rate": 1.7567128158176955e-05, "log_odds_chosen": -0.01171875, "log_odds_ratio": -0.701171875, "logits/chosen": 0.522216796875, "logits/rejected": 0.595458984375, "logps/chosen": -0.5740966796875, "logps/rejected": -0.56280517578125, "loss": 11.4912, "nll_loss": 0.57281494140625, "rewards/accuracies": 0.5, "rewards/chosen": -0.05739593505859375, "rewards/margins": -0.00110626220703125, "rewards/rejected": -0.0562896728515625, "step": 428 }, { "epoch": 0.23159457453269452, "grad_norm": 0.1882856233014425, "learning_rate": 1.755597333817856e-05, "log_odds_chosen": 0.056640625, "log_odds_ratio": -0.66650390625, "logits/chosen": 0.312164306640625, "logits/rejected": 0.38930177688598633, "logps/chosen": -0.486572265625, "logps/rejected": -0.5029296875, "loss": 9.0034, "nll_loss": 0.500244140625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04868316650390625, "rewards/margins": 0.00157928466796875, "rewards/rejected": -0.050262451171875, "step": 429 }, { "epoch": 0.23213442202577772, "grad_norm": 0.19582079466858832, "learning_rate": 1.754479656287962e-05, "log_odds_chosen": 0.0860595703125, "log_odds_ratio": -0.654296875, "logits/chosen": 0.3464794158935547, "logits/rejected": 0.3992462158203125, "logps/chosen": -0.5477294921875, "logps/rejected": -0.564208984375, "loss": 9.2788, "nll_loss": 0.5601806640625, "rewards/accuracies": 0.75, "rewards/chosen": -0.05474090576171875, "rewards/margins": 0.00164031982421875, "rewards/rejected": -0.0563812255859375, "step": 430 }, { "epoch": 0.23267426951886092, "grad_norm": 0.1950211274814484, "learning_rate": 1.7533597864756345e-05, "log_odds_chosen": 0.0660400390625, "log_odds_ratio": -0.66259765625, "logits/chosen": 0.25677490234375, "logits/rejected": 0.3721122741699219, "logps/chosen": -0.5054931640625, "logps/rejected": -0.527099609375, "loss": 10.3774, "nll_loss": 0.5208740234375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.050537109375, "rewards/margins": 0.0021820068359375, "rewards/rejected": -0.0527191162109375, "step": 431 }, { "epoch": 0.23321411701194414, "grad_norm": 0.1954097934471334, "learning_rate": 1.752237727634867e-05, "log_odds_chosen": 0.013671875, "log_odds_ratio": -0.68994140625, "logits/chosen": 0.35888671875, "logits/rejected": 0.4437255859375, "logps/chosen": -0.549560546875, "logps/rejected": -0.543212890625, "loss": 9.5576, "nll_loss": 0.55206298828125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0549468994140625, "rewards/margins": -0.00060272216796875, "rewards/rejected": -0.05434417724609375, "step": 432 }, { "epoch": 0.23375396450502733, "grad_norm": 0.1986493651843359, "learning_rate": 1.7511134830260115e-05, "log_odds_chosen": 0.0916748046875, "log_odds_ratio": -0.6566162109375, "logits/chosen": 0.0705413818359375, "logits/rejected": 0.148834228515625, "logps/chosen": -0.5535888671875, "logps/rejected": -0.573974609375, "loss": 9.1812, "nll_loss": 0.561767578125, "rewards/accuracies": 0.625, "rewards/chosen": -0.055328369140625, "rewards/margins": 0.002044677734375, "rewards/rejected": -0.057373046875, "step": 433 }, { "epoch": 0.23429381199811053, "grad_norm": 0.2204596785373209, "learning_rate": 1.749987055915772e-05, "log_odds_chosen": 0.075927734375, "log_odds_ratio": -0.6591796875, "logits/chosen": 0.356170654296875, "logits/rejected": 0.4336204528808594, "logps/chosen": -0.492919921875, "logps/rejected": -0.513916015625, "loss": 9.9785, "nll_loss": 0.5150146484375, "rewards/accuracies": 0.5, "rewards/chosen": -0.0493011474609375, "rewards/margins": 0.0020751953125, "rewards/rejected": -0.0513763427734375, "step": 434 }, { "epoch": 0.23483365949119372, "grad_norm": 0.19652480597482222, "learning_rate": 1.748858449577195e-05, "log_odds_chosen": 0.0859375, "log_odds_ratio": -0.6573486328125, "logits/chosen": 0.1187744140625, "logits/rejected": 0.20758056640625, "logps/chosen": -0.48046875, "logps/rejected": -0.496826171875, "loss": 10.0535, "nll_loss": 0.48822021484375, "rewards/accuracies": 0.625, "rewards/chosen": -0.04804229736328125, "rewards/margins": 0.00167083740234375, "rewards/rejected": -0.049713134765625, "step": 435 }, { "epoch": 0.23537350698427695, "grad_norm": 0.21389740695374004, "learning_rate": 1.7477276672896574e-05, "log_odds_chosen": 0.14178466796875, "log_odds_ratio": -0.631103515625, "logits/chosen": 0.14566612243652344, "logits/rejected": 0.28110504150390625, "logps/chosen": -0.5880126953125, "logps/rejected": -0.62109375, "loss": 9.5537, "nll_loss": 0.61065673828125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05879974365234375, "rewards/margins": 0.00331878662109375, "rewards/rejected": -0.0621185302734375, "step": 436 }, { "epoch": 0.23591335447736014, "grad_norm": 0.1871330071310779, "learning_rate": 1.7465947123388597e-05, "log_odds_chosen": 0.08160400390625, "log_odds_ratio": -0.65576171875, "logits/chosen": 0.619476318359375, "logits/rejected": 0.566375732421875, "logps/chosen": -0.57672119140625, "logps/rejected": -0.6175537109375, "loss": 9.4185, "nll_loss": 0.57891845703125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05770111083984375, "rewards/margins": 0.0040740966796875, "rewards/rejected": -0.06177520751953125, "step": 437 }, { "epoch": 0.23645320197044334, "grad_norm": 0.18043063307382695, "learning_rate": 1.7454595880168158e-05, "log_odds_chosen": 0.0782470703125, "log_odds_ratio": -0.6578369140625, "logits/chosen": 0.35662078857421875, "logits/rejected": 0.40350341796875, "logps/chosen": -0.448974609375, "logps/rejected": -0.4754638671875, "loss": 9.0483, "nll_loss": 0.4576416015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.04489898681640625, "rewards/margins": 0.00266265869140625, "rewards/rejected": -0.0475616455078125, "step": 438 }, { "epoch": 0.23699304946352656, "grad_norm": 0.1863118432663848, "learning_rate": 1.7443222976218424e-05, "log_odds_chosen": 0.118896484375, "log_odds_ratio": -0.63720703125, "logits/chosen": 0.4410400390625, "logits/rejected": 0.5144500732421875, "logps/chosen": -0.48681640625, "logps/rejected": -0.528076171875, "loss": 9.2256, "nll_loss": 0.49395751953125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0486602783203125, "rewards/margins": 0.0041351318359375, "rewards/rejected": -0.05279541015625, "step": 439 }, { "epoch": 0.23753289695660976, "grad_norm": 0.22180938276997433, "learning_rate": 1.7431828444585508e-05, "log_odds_chosen": 0.02978515625, "log_odds_ratio": -0.68017578125, "logits/chosen": 0.34845733642578125, "logits/rejected": 0.3853607177734375, "logps/chosen": -0.494873046875, "logps/rejected": -0.50244140625, "loss": 10.1948, "nll_loss": 0.50048828125, "rewards/accuracies": 0.375, "rewards/chosen": -0.0495147705078125, "rewards/margins": 0.000762939453125, "rewards/rejected": -0.0502777099609375, "step": 440 }, { "epoch": 0.23807274444969295, "grad_norm": 0.1699171761105973, "learning_rate": 1.7420412318378363e-05, "log_odds_chosen": 0.1190185546875, "log_odds_ratio": -0.6400146484375, "logits/chosen": 0.24187088012695312, "logits/rejected": 0.337738037109375, "logps/chosen": -0.462158203125, "logps/rejected": -0.4947509765625, "loss": 8.3701, "nll_loss": 0.4700927734375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04621124267578125, "rewards/margins": 0.00327301025390625, "rewards/rejected": -0.0494842529296875, "step": 441 }, { "epoch": 0.23861259194277618, "grad_norm": 0.1923814792775152, "learning_rate": 1.740897463076869e-05, "log_odds_chosen": 0.061767578125, "log_odds_ratio": -0.66455078125, "logits/chosen": 0.394775390625, "logits/rejected": 0.507598876953125, "logps/chosen": -0.4664306640625, "logps/rejected": -0.4844970703125, "loss": 9.3696, "nll_loss": 0.4798583984375, "rewards/accuracies": 0.625, "rewards/chosen": -0.046600341796875, "rewards/margins": 0.00186920166015625, "rewards/rejected": -0.04846954345703125, "step": 442 }, { "epoch": 0.23915243943585937, "grad_norm": 0.21048721689591432, "learning_rate": 1.7397515414990835e-05, "log_odds_chosen": 0.151611328125, "log_odds_ratio": -0.6220703125, "logits/chosen": 0.0903778076171875, "logits/rejected": 0.137542724609375, "logps/chosen": -0.3941650390625, "logps/rejected": -0.4427490234375, "loss": 9.8066, "nll_loss": 0.402099609375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0394134521484375, "rewards/margins": 0.00485992431640625, "rewards/rejected": -0.04427337646484375, "step": 443 }, { "epoch": 0.23969228692894257, "grad_norm": 0.18670785529844036, "learning_rate": 1.7386034704341705e-05, "log_odds_chosen": 0.04443359375, "log_odds_ratio": -0.673095703125, "logits/chosen": 0.3885040283203125, "logits/rejected": 0.421661376953125, "logps/chosen": -0.486572265625, "logps/rejected": -0.4990234375, "loss": 8.8794, "nll_loss": 0.498291015625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0486297607421875, "rewards/margins": 0.00128936767578125, "rewards/rejected": -0.04991912841796875, "step": 444 }, { "epoch": 0.2402321344220258, "grad_norm": 0.2037353544579713, "learning_rate": 1.7374532532180665e-05, "log_odds_chosen": -0.0013427734375, "log_odds_ratio": -0.69580078125, "logits/chosen": 0.5746002197265625, "logits/rejected": 0.747584342956543, "logps/chosen": -0.5885009765625, "logps/rejected": -0.58642578125, "loss": 10.0425, "nll_loss": 0.5948486328125, "rewards/accuracies": 0.5, "rewards/chosen": -0.05886077880859375, "rewards/margins": -0.00022125244140625, "rewards/rejected": -0.0586395263671875, "step": 445 }, { "epoch": 0.24077198191510898, "grad_norm": 0.2072033830689558, "learning_rate": 1.7363008931929438e-05, "log_odds_chosen": 0.089599609375, "log_odds_ratio": -0.651611328125, "logits/chosen": 0.4948558807373047, "logits/rejected": 0.5635392665863037, "logps/chosen": -0.51123046875, "logps/rejected": -0.5421142578125, "loss": 9.7515, "nll_loss": 0.52490234375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0511322021484375, "rewards/margins": 0.00307464599609375, "rewards/rejected": -0.05420684814453125, "step": 446 }, { "epoch": 0.24131182940819218, "grad_norm": 0.18955698564924256, "learning_rate": 1.7351463937072008e-05, "log_odds_chosen": 0.100341796875, "log_odds_ratio": -0.645751953125, "logits/chosen": 0.3415374755859375, "logits/rejected": 0.3901519775390625, "logps/chosen": -0.418701171875, "logps/rejected": -0.4482421875, "loss": 9.3027, "nll_loss": 0.422119140625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.041900634765625, "rewards/margins": 0.002899169921875, "rewards/rejected": -0.0447998046875, "step": 447 }, { "epoch": 0.2418516769012754, "grad_norm": 0.16528255722523316, "learning_rate": 1.7339897581154524e-05, "log_odds_chosen": 0.0460205078125, "log_odds_ratio": -0.675048828125, "logits/chosen": 0.40570068359375, "logits/rejected": 0.4785308837890625, "logps/chosen": -0.4439697265625, "logps/rejected": -0.45745849609375, "loss": 8.7065, "nll_loss": 0.454833984375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0443572998046875, "rewards/margins": 0.00142669677734375, "rewards/rejected": -0.04578399658203125, "step": 448 }, { "epoch": 0.2423915243943586, "grad_norm": 0.18060522921188055, "learning_rate": 1.7328309897785213e-05, "log_odds_chosen": 0.09454345703125, "log_odds_ratio": -0.649169921875, "logits/chosen": 0.111541748046875, "logits/rejected": 0.1734619140625, "logps/chosen": -0.41064453125, "logps/rejected": -0.44268798828125, "loss": 8.7275, "nll_loss": 0.410888671875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04106903076171875, "rewards/margins": 0.00319671630859375, "rewards/rejected": -0.0442657470703125, "step": 449 }, { "epoch": 0.2429313718874418, "grad_norm": 0.1967002283092249, "learning_rate": 1.7316700920634262e-05, "log_odds_chosen": -0.0081787109375, "log_odds_ratio": -0.6982421875, "logits/chosen": 0.550750732421875, "logits/rejected": 0.6371917724609375, "logps/chosen": -0.537841796875, "logps/rejected": -0.5306396484375, "loss": 9.0244, "nll_loss": 0.552001953125, "rewards/accuracies": 0.375, "rewards/chosen": -0.053802490234375, "rewards/margins": -0.000701904296875, "rewards/rejected": -0.0531005859375, "step": 450 }, { "epoch": 0.243471219380525, "grad_norm": 0.2114705664854613, "learning_rate": 1.730507068343374e-05, "log_odds_chosen": 0.02783203125, "log_odds_ratio": -0.6842041015625, "logits/chosen": 0.3989216089248657, "logits/rejected": 0.5430583953857422, "logps/chosen": -0.6190185546875, "logps/rejected": -0.616455078125, "loss": 10.3105, "nll_loss": 0.619873046875, "rewards/accuracies": 0.5, "rewards/chosen": -0.0618896484375, "rewards/margins": -0.0002593994140625, "rewards/rejected": -0.0616302490234375, "step": 451 }, { "epoch": 0.2440110668736082, "grad_norm": 0.20264889864915256, "learning_rate": 1.7293419219977487e-05, "log_odds_chosen": 0.099853515625, "log_odds_ratio": -0.6492919921875, "logits/chosen": 0.15606689453125, "logits/rejected": 0.23213768005371094, "logps/chosen": -0.43548583984375, "logps/rejected": -0.46563720703125, "loss": 8.5645, "nll_loss": 0.443603515625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0435638427734375, "rewards/margins": 0.0030059814453125, "rewards/rejected": -0.04656982421875, "step": 452 }, { "epoch": 0.2445509143666914, "grad_norm": 0.1861462244831662, "learning_rate": 1.7281746564121012e-05, "log_odds_chosen": 0.1251220703125, "log_odds_ratio": -0.634765625, "logits/chosen": 0.22344970703125, "logits/rejected": 0.3062744140625, "logps/chosen": -0.4578857421875, "logps/rejected": -0.49560546875, "loss": 8.874, "nll_loss": 0.4703369140625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0457916259765625, "rewards/margins": 0.0037689208984375, "rewards/rejected": -0.049560546875, "step": 453 }, { "epoch": 0.2450907618597746, "grad_norm": 0.19291443228138633, "learning_rate": 1.7270052749781423e-05, "log_odds_chosen": 0.0643310546875, "log_odds_ratio": -0.6630859375, "logits/chosen": 0.31339263916015625, "logits/rejected": 0.4044008255004883, "logps/chosen": -0.465087890625, "logps/rejected": -0.4864501953125, "loss": 9.0854, "nll_loss": 0.4837646484375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04647064208984375, "rewards/margins": 0.00214385986328125, "rewards/rejected": -0.048614501953125, "step": 454 }, { "epoch": 0.24563060935285783, "grad_norm": 0.1795860198627653, "learning_rate": 1.7258337810937284e-05, "log_odds_chosen": 0.0941162109375, "log_odds_ratio": -0.653076171875, "logits/chosen": 0.17108154296875, "logits/rejected": 0.193206787109375, "logps/chosen": -0.42767333984375, "logps/rejected": -0.458251953125, "loss": 9.0874, "nll_loss": 0.43292236328125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0427398681640625, "rewards/margins": 0.00305938720703125, "rewards/rejected": -0.04579925537109375, "step": 455 }, { "epoch": 0.24617045684594102, "grad_norm": 0.20420307169971885, "learning_rate": 1.7246601781628557e-05, "log_odds_chosen": 0.0892333984375, "log_odds_ratio": -0.651123046875, "logits/chosen": 0.2935514450073242, "logits/rejected": 0.3281707763671875, "logps/chosen": -0.4351806640625, "logps/rejected": -0.46484375, "loss": 9.3613, "nll_loss": 0.4400634765625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04351043701171875, "rewards/margins": 0.00295257568359375, "rewards/rejected": -0.0464630126953125, "step": 456 }, { "epoch": 0.24671030433902422, "grad_norm": 0.19304296002905838, "learning_rate": 1.723484469595648e-05, "log_odds_chosen": 0.09820556640625, "log_odds_ratio": -0.64794921875, "logits/chosen": 0.274810791015625, "logits/rejected": 0.338165283203125, "logps/chosen": -0.4249267578125, "logps/rejected": -0.45989990234375, "loss": 8.5361, "nll_loss": 0.4300537109375, "rewards/accuracies": 0.75, "rewards/chosen": -0.042510986328125, "rewards/margins": 0.00347900390625, "rewards/rejected": -0.045989990234375, "step": 457 }, { "epoch": 0.24725015183210744, "grad_norm": 0.2052672322297934, "learning_rate": 1.722306658808347e-05, "log_odds_chosen": 0.069091796875, "log_odds_ratio": -0.663330078125, "logits/chosen": 0.13165283203125, "logits/rejected": 0.2506752014160156, "logps/chosen": -0.55963134765625, "logps/rejected": -0.57275390625, "loss": 10.0386, "nll_loss": 0.573974609375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05588531494140625, "rewards/margins": 0.001373291015625, "rewards/rejected": -0.05725860595703125, "step": 458 }, { "epoch": 0.24778999932519064, "grad_norm": 0.19287338125502632, "learning_rate": 1.721126749223304e-05, "log_odds_chosen": 0.1053466796875, "log_odds_ratio": -0.6456298828125, "logits/chosen": 0.389251708984375, "logits/rejected": 0.4638671875, "logps/chosen": -0.50445556640625, "logps/rejected": -0.5419921875, "loss": 9.3936, "nll_loss": 0.548583984375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0504608154296875, "rewards/margins": 0.003753662109375, "rewards/rejected": -0.0542144775390625, "step": 459 }, { "epoch": 0.24832984681827383, "grad_norm": 0.20236690376410252, "learning_rate": 1.719944744268968e-05, "log_odds_chosen": 0.158447265625, "log_odds_ratio": -0.61962890625, "logits/chosen": 0.4940948486328125, "logits/rejected": 0.5307998657226562, "logps/chosen": -0.45733642578125, "logps/rejected": -0.50634765625, "loss": 8.9727, "nll_loss": 0.4652099609375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04570770263671875, "rewards/margins": 0.00490570068359375, "rewards/rejected": -0.0506134033203125, "step": 460 }, { "epoch": 0.24886969431135705, "grad_norm": 0.18244205464943086, "learning_rate": 1.7187606473798756e-05, "log_odds_chosen": 0.081787109375, "log_odds_ratio": -0.655517578125, "logits/chosen": 0.39855194091796875, "logits/rejected": 0.43976783752441406, "logps/chosen": -0.5445556640625, "logps/rejected": -0.56396484375, "loss": 9.4219, "nll_loss": 0.55224609375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.054473876953125, "rewards/margins": 0.0019378662109375, "rewards/rejected": -0.0564117431640625, "step": 461 }, { "epoch": 0.24940954180444025, "grad_norm": 0.214027304093284, "learning_rate": 1.7175744619966442e-05, "log_odds_chosen": 0.2568359375, "log_odds_ratio": -0.5770263671875, "logits/chosen": 0.06438446044921875, "logits/rejected": 0.0712432861328125, "logps/chosen": -0.39263916015625, "logps/rejected": -0.4708251953125, "loss": 9.1689, "nll_loss": 0.4068603515625, "rewards/accuracies": 0.9375, "rewards/chosen": -0.03925323486328125, "rewards/margins": 0.00782012939453125, "rewards/rejected": -0.0470733642578125, "step": 462 }, { "epoch": 0.24994938929752344, "grad_norm": 0.20832754836035242, "learning_rate": 1.7163861915659574e-05, "log_odds_chosen": 0.10052490234375, "log_odds_ratio": -0.653076171875, "logits/chosen": 0.07468795776367188, "logits/rejected": 0.1709442138671875, "logps/chosen": -0.53924560546875, "logps/rejected": -0.55108642578125, "loss": 9.5459, "nll_loss": 0.54095458984375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05393218994140625, "rewards/margins": 0.00116729736328125, "rewards/rejected": -0.0550994873046875, "step": 463 }, { "epoch": 0.25048923679060664, "grad_norm": 0.19563328127262847, "learning_rate": 1.7151958395405584e-05, "log_odds_chosen": 0.164794921875, "log_odds_ratio": -0.61865234375, "logits/chosen": 0.0257568359375, "logits/rejected": 0.14662933349609375, "logps/chosen": -0.3782958984375, "logps/rejected": -0.429443359375, "loss": 8.7896, "nll_loss": 0.3790283203125, "rewards/accuracies": 0.875, "rewards/chosen": -0.03781890869140625, "rewards/margins": 0.00511932373046875, "rewards/rejected": -0.042938232421875, "step": 464 }, { "epoch": 0.25102908428368986, "grad_norm": 0.19778033113966537, "learning_rate": 1.7140034093792394e-05, "log_odds_chosen": 0.1103515625, "log_odds_ratio": -0.6455078125, "logits/chosen": 0.3477020263671875, "logits/rejected": 0.3780689239501953, "logps/chosen": -0.45782470703125, "logps/rejected": -0.49163818359375, "loss": 9.2739, "nll_loss": 0.474365234375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0457916259765625, "rewards/margins": 0.00337982177734375, "rewards/rejected": -0.04917144775390625, "step": 465 }, { "epoch": 0.2515689317767731, "grad_norm": 0.19156684503125967, "learning_rate": 1.7128089045468294e-05, "log_odds_chosen": 0.160888671875, "log_odds_ratio": -0.61865234375, "logits/chosen": 0.013397216796875, "logits/rejected": 0.1010589599609375, "logps/chosen": -0.431884765625, "logps/rejected": -0.4818115234375, "loss": 8.6162, "nll_loss": 0.4384765625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0431976318359375, "rewards/margins": 0.0049896240234375, "rewards/rejected": -0.048187255859375, "step": 466 }, { "epoch": 0.25210877926985625, "grad_norm": 0.20181917693179685, "learning_rate": 1.711612328514187e-05, "log_odds_chosen": 0.04931640625, "log_odds_ratio": -0.67236328125, "logits/chosen": 0.3764514923095703, "logits/rejected": 0.466522216796875, "logps/chosen": -0.59716796875, "logps/rejected": -0.60107421875, "loss": 9.2207, "nll_loss": 0.5975341796875, "rewards/accuracies": 0.625, "rewards/chosen": -0.05970001220703125, "rewards/margins": 0.00041961669921875, "rewards/rejected": -0.06011962890625, "step": 467 }, { "epoch": 0.2526486267629395, "grad_norm": 0.20718205099815606, "learning_rate": 1.7104136847581892e-05, "log_odds_chosen": 0.05694580078125, "log_odds_ratio": -0.666748046875, "logits/chosen": 0.623321533203125, "logits/rejected": 0.7111129760742188, "logps/chosen": -0.633544921875, "logps/rejected": -0.64990234375, "loss": 9.8506, "nll_loss": 0.6407470703125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0633544921875, "rewards/margins": 0.00164794921875, "rewards/rejected": -0.06500244140625, "step": 468 }, { "epoch": 0.2531884742560227, "grad_norm": 0.2073557940826452, "learning_rate": 1.70921297676172e-05, "log_odds_chosen": 0.089111328125, "log_odds_ratio": -0.651123046875, "logits/chosen": 0.4581298828125, "logits/rejected": 0.5158376693725586, "logps/chosen": -0.473876953125, "logps/rejected": -0.503173828125, "loss": 9.7549, "nll_loss": 0.4786376953125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04738616943359375, "rewards/margins": 0.00292205810546875, "rewards/rejected": -0.0503082275390625, "step": 469 }, { "epoch": 0.25372832174910587, "grad_norm": 0.1908295669413542, "learning_rate": 1.7080102080136623e-05, "log_odds_chosen": 0.0643310546875, "log_odds_ratio": -0.66455078125, "logits/chosen": 0.353912353515625, "logits/rejected": 0.41790008544921875, "logps/chosen": -0.4610595703125, "logps/rejected": -0.481689453125, "loss": 9.2271, "nll_loss": 0.469970703125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.046112060546875, "rewards/margins": 0.0020904541015625, "rewards/rejected": -0.0482025146484375, "step": 470 }, { "epoch": 0.2542681692421891, "grad_norm": 0.19881944791931322, "learning_rate": 1.706805382008887e-05, "log_odds_chosen": 0.1649169921875, "log_odds_ratio": -0.61865234375, "logits/chosen": 0.1857452392578125, "logits/rejected": 0.2253570556640625, "logps/chosen": -0.4306640625, "logps/rejected": -0.4803466796875, "loss": 8.9951, "nll_loss": 0.4434814453125, "rewards/accuracies": 0.875, "rewards/chosen": -0.0430755615234375, "rewards/margins": 0.00494384765625, "rewards/rejected": -0.0480194091796875, "step": 471 }, { "epoch": 0.25480801673527226, "grad_norm": 0.19983818799818573, "learning_rate": 1.7055985022482426e-05, "log_odds_chosen": 0.10797119140625, "log_odds_ratio": -0.646728515625, "logits/chosen": 0.10845947265625, "logits/rejected": 0.296600341796875, "logps/chosen": -0.490478515625, "logps/rejected": -0.52117919921875, "loss": 9.1738, "nll_loss": 0.50445556640625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0490264892578125, "rewards/margins": 0.0030975341796875, "rewards/rejected": -0.0521240234375, "step": 472 }, { "epoch": 0.2553478642283555, "grad_norm": 0.18939023721967557, "learning_rate": 1.7043895722385447e-05, "log_odds_chosen": 0.0703125, "log_odds_ratio": -0.665283203125, "logits/chosen": 0.2360992431640625, "logits/rejected": 0.291839599609375, "logps/chosen": -0.5087890625, "logps/rejected": -0.5194091796875, "loss": 8.8667, "nll_loss": 0.5115966796875, "rewards/accuracies": 0.625, "rewards/chosen": -0.05084228515625, "rewards/margins": 0.0010986328125, "rewards/rejected": -0.05194091796875, "step": 473 }, { "epoch": 0.2558877117214387, "grad_norm": 0.19524322005531283, "learning_rate": 1.7031785954925667e-05, "log_odds_chosen": 0.076171875, "log_odds_ratio": -0.658935546875, "logits/chosen": 0.2885093688964844, "logits/rejected": 0.3442840576171875, "logps/chosen": -0.4573974609375, "logps/rejected": -0.4786376953125, "loss": 8.9038, "nll_loss": 0.4600830078125, "rewards/accuracies": 0.5, "rewards/chosen": -0.0457611083984375, "rewards/margins": 0.0020751953125, "rewards/rejected": -0.0478363037109375, "step": 474 }, { "epoch": 0.25642755921452187, "grad_norm": 0.1898168853239426, "learning_rate": 1.701965575529029e-05, "log_odds_chosen": 0.0677490234375, "log_odds_ratio": -0.6650390625, "logits/chosen": 0.23109054565429688, "logits/rejected": 0.30889892578125, "logps/chosen": -0.467041015625, "logps/rejected": -0.4918212890625, "loss": 9.272, "nll_loss": 0.4688720703125, "rewards/accuracies": 0.625, "rewards/chosen": -0.04668426513671875, "rewards/margins": 0.00250244140625, "rewards/rejected": -0.04918670654296875, "step": 475 }, { "epoch": 0.2569674067076051, "grad_norm": 0.20335745132563318, "learning_rate": 1.7007505158725893e-05, "log_odds_chosen": 0.07342529296875, "log_odds_ratio": -0.6627197265625, "logits/chosen": 0.33282470703125, "logits/rejected": 0.4166259765625, "logps/chosen": -0.5872802734375, "logps/rejected": -0.6002197265625, "loss": 9.2578, "nll_loss": 0.59576416015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.05875396728515625, "rewards/margins": 0.00128173828125, "rewards/rejected": -0.06003570556640625, "step": 476 }, { "epoch": 0.2575072542006883, "grad_norm": 0.21547866105509203, "learning_rate": 1.699533420053832e-05, "log_odds_chosen": 0.0909423828125, "log_odds_ratio": -0.6568603515625, "logits/chosen": 0.38407135009765625, "logits/rejected": 0.44183349609375, "logps/chosen": -0.5806884765625, "logps/rejected": -0.59130859375, "loss": 10.1665, "nll_loss": 0.6063232421875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05805206298828125, "rewards/margins": 0.00107574462890625, "rewards/rejected": -0.0591278076171875, "step": 477 }, { "epoch": 0.2580471016937715, "grad_norm": 0.18935816689817073, "learning_rate": 1.6983142916092572e-05, "log_odds_chosen": 0.1221923828125, "log_odds_ratio": -0.640869140625, "logits/chosen": 0.22306251525878906, "logits/rejected": 0.2716236114501953, "logps/chosen": -0.4298095703125, "logps/rejected": -0.4652099609375, "loss": 8.5493, "nll_loss": 0.44000244140625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0429840087890625, "rewards/margins": 0.0035247802734375, "rewards/rejected": -0.0465087890625, "step": 478 }, { "epoch": 0.2585869491868547, "grad_norm": 0.20286691803086246, "learning_rate": 1.697093134081272e-05, "log_odds_chosen": 0.052734375, "log_odds_ratio": -0.669189453125, "logits/chosen": 0.565704345703125, "logits/rejected": 0.6176338195800781, "logps/chosen": -0.5101318359375, "logps/rejected": -0.5247802734375, "loss": 8.9365, "nll_loss": 0.5184326171875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05107879638671875, "rewards/margins": 0.00144195556640625, "rewards/rejected": -0.052520751953125, "step": 479 }, { "epoch": 0.25912679667993793, "grad_norm": 0.1825519888402762, "learning_rate": 1.6958699510181793e-05, "log_odds_chosen": 0.0316162109375, "log_odds_ratio": -0.6845703125, "logits/chosen": 0.44023019075393677, "logits/rejected": 0.5654983520507812, "logps/chosen": -0.607421875, "logps/rejected": -0.595947265625, "loss": 8.2515, "nll_loss": 0.6190185546875, "rewards/accuracies": 0.5, "rewards/chosen": -0.06073760986328125, "rewards/margins": -0.00115203857421875, "rewards/rejected": -0.0595855712890625, "step": 480 }, { "epoch": 0.2596666441730211, "grad_norm": 0.20207473413876717, "learning_rate": 1.694644745974167e-05, "log_odds_chosen": 0.060791015625, "log_odds_ratio": -0.667236328125, "logits/chosen": 0.1978302001953125, "logits/rejected": 0.21152114868164062, "logps/chosen": -0.427490234375, "logps/rejected": -0.4520263671875, "loss": 8.9829, "nll_loss": 0.4317626953125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04276275634765625, "rewards/margins": 0.002471923828125, "rewards/rejected": -0.04523468017578125, "step": 481 }, { "epoch": 0.2602064916661043, "grad_norm": 0.2060828662222619, "learning_rate": 1.6934175225092985e-05, "log_odds_chosen": 0.0880126953125, "log_odds_ratio": -0.6585693359375, "logits/chosen": 0.15130615234375, "logits/rejected": 0.128662109375, "logps/chosen": -0.543701171875, "logps/rejected": -0.5965576171875, "loss": 9.6919, "nll_loss": 0.5445556640625, "rewards/accuracies": 0.625, "rewards/chosen": -0.054351806640625, "rewards/margins": 0.00530242919921875, "rewards/rejected": -0.05965423583984375, "step": 482 }, { "epoch": 0.26074633915918755, "grad_norm": 0.19958990324453796, "learning_rate": 1.6921882841895027e-05, "log_odds_chosen": 0.10791015625, "log_odds_ratio": -0.64208984375, "logits/chosen": 0.4562082290649414, "logits/rejected": 0.454071044921875, "logps/chosen": -0.499755859375, "logps/rejected": -0.53564453125, "loss": 9.6943, "nll_loss": 0.5052490234375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04998016357421875, "rewards/margins": 0.00360870361328125, "rewards/rejected": -0.0535888671875, "step": 483 }, { "epoch": 0.2612861866522707, "grad_norm": 0.18756962041654837, "learning_rate": 1.6909570345865622e-05, "log_odds_chosen": 0.178955078125, "log_odds_ratio": -0.6148681640625, "logits/chosen": 0.34548187255859375, "logits/rejected": 0.3906841278076172, "logps/chosen": -0.444091796875, "logps/rejected": -0.5089111328125, "loss": 8.5791, "nll_loss": 0.4488525390625, "rewards/accuracies": 0.75, "rewards/chosen": -0.044403076171875, "rewards/margins": 0.0064849853515625, "rewards/rejected": -0.0508880615234375, "step": 484 }, { "epoch": 0.26182603414535394, "grad_norm": 0.18834520521438505, "learning_rate": 1.6897237772781046e-05, "log_odds_chosen": 0.1783447265625, "log_odds_ratio": -0.6126708984375, "logits/chosen": 0.1951904296875, "logits/rejected": 0.2748260498046875, "logps/chosen": -0.44866943359375, "logps/rejected": -0.50537109375, "loss": 8.7661, "nll_loss": 0.4495849609375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04483795166015625, "rewards/margins": 0.00566864013671875, "rewards/rejected": -0.050506591796875, "step": 485 }, { "epoch": 0.26236588163843716, "grad_norm": 0.19039831784128308, "learning_rate": 1.6884885158475897e-05, "log_odds_chosen": 0.053466796875, "log_odds_ratio": -0.669189453125, "logits/chosen": 0.5151901245117188, "logits/rejected": 0.6440505981445312, "logps/chosen": -0.5169677734375, "logps/rejected": -0.5321044921875, "loss": 8.7114, "nll_loss": 0.5220947265625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0516815185546875, "rewards/margins": 0.0015106201171875, "rewards/rejected": -0.053192138671875, "step": 486 }, { "epoch": 0.2629057291315203, "grad_norm": 0.20512398786237368, "learning_rate": 1.687251253884303e-05, "log_odds_chosen": 0.0450439453125, "log_odds_ratio": -0.673583984375, "logits/chosen": 0.28485107421875, "logits/rejected": 0.4578857421875, "logps/chosen": -0.5810546875, "logps/rejected": -0.5889892578125, "loss": 9.9927, "nll_loss": 0.6041259765625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05811309814453125, "rewards/margins": 0.00078582763671875, "rewards/rejected": -0.05889892578125, "step": 487 }, { "epoch": 0.26344557662460355, "grad_norm": 0.2048240991014516, "learning_rate": 1.686011994983341e-05, "log_odds_chosen": 0.095458984375, "log_odds_ratio": -0.6492919921875, "logits/chosen": 0.5394287109375, "logits/rejected": 0.634490966796875, "logps/chosen": -0.4908447265625, "logps/rejected": -0.5225830078125, "loss": 9.4937, "nll_loss": 0.4993896484375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0490875244140625, "rewards/margins": 0.0031585693359375, "rewards/rejected": -0.05224609375, "step": 488 }, { "epoch": 0.2639854241176868, "grad_norm": 0.19586883396400875, "learning_rate": 1.684770742745603e-05, "log_odds_chosen": 0.08154296875, "log_odds_ratio": -0.6668701171875, "logits/chosen": 0.25931549072265625, "logits/rejected": 0.400390625, "logps/chosen": -0.5413818359375, "logps/rejected": -0.55316162109375, "loss": 8.311, "nll_loss": 0.545166015625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05414581298828125, "rewards/margins": 0.0011444091796875, "rewards/rejected": -0.05529022216796875, "step": 489 }, { "epoch": 0.26452527161076994, "grad_norm": 0.21136477906307385, "learning_rate": 1.6835275007777815e-05, "log_odds_chosen": 0.1044921875, "log_odds_ratio": -0.6484375, "logits/chosen": 0.3000030517578125, "logits/rejected": 0.34918212890625, "logps/chosen": -0.425048828125, "logps/rejected": -0.458740234375, "loss": 9.4653, "nll_loss": 0.437744140625, "rewards/accuracies": 0.75, "rewards/chosen": -0.04251861572265625, "rewards/margins": 0.00336456298828125, "rewards/rejected": -0.0458831787109375, "step": 490 }, { "epoch": 0.26506511910385316, "grad_norm": 0.20109592293733897, "learning_rate": 1.682282272692349e-05, "log_odds_chosen": 0.0008544921875, "log_odds_ratio": -0.70361328125, "logits/chosen": 0.2806910276412964, "logits/rejected": 0.32452392578125, "logps/chosen": -0.49334716796875, "logps/rejected": -0.4859619140625, "loss": 9.0396, "nll_loss": 0.505615234375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04935455322265625, "rewards/margins": -0.0007476806640625, "rewards/rejected": -0.04860687255859375, "step": 491 }, { "epoch": 0.2656049665969364, "grad_norm": 0.1959281923989542, "learning_rate": 1.68103506210755e-05, "log_odds_chosen": 0.1512451171875, "log_odds_ratio": -0.630615234375, "logits/chosen": 0.2223968505859375, "logits/rejected": 0.2709197998046875, "logps/chosen": -0.53759765625, "logps/rejected": -0.5777587890625, "loss": 9.5547, "nll_loss": 0.537109375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0537567138671875, "rewards/margins": 0.0040283203125, "rewards/rejected": -0.0577850341796875, "step": 492 }, { "epoch": 0.26614481409001955, "grad_norm": 0.2083354390415315, "learning_rate": 1.6797858726473893e-05, "log_odds_chosen": 0.2310791015625, "log_odds_ratio": -0.5943603515625, "logits/chosen": 0.3614501953125, "logits/rejected": 0.40753936767578125, "logps/chosen": -0.5068359375, "logps/rejected": -0.5771484375, "loss": 9.8726, "nll_loss": 0.5137939453125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05072021484375, "rewards/margins": 0.006988525390625, "rewards/rejected": -0.057708740234375, "step": 493 }, { "epoch": 0.2666846615831028, "grad_norm": 0.18469317743414593, "learning_rate": 1.678534707941622e-05, "log_odds_chosen": 0.093017578125, "log_odds_ratio": -0.652099609375, "logits/chosen": 0.3053007125854492, "logits/rejected": 0.41160058975219727, "logps/chosen": -0.5306396484375, "logps/rejected": -0.5518798828125, "loss": 8.7505, "nll_loss": 0.53857421875, "rewards/accuracies": 0.625, "rewards/chosen": -0.05303955078125, "rewards/margins": 0.0021209716796875, "rewards/rejected": -0.0551605224609375, "step": 494 }, { "epoch": 0.267224509076186, "grad_norm": 0.20465600911273704, "learning_rate": 1.6772815716257414e-05, "log_odds_chosen": 0.1431884765625, "log_odds_ratio": -0.6339111328125, "logits/chosen": 0.3443603515625, "logits/rejected": 0.3033599853515625, "logps/chosen": -0.5546875, "logps/rejected": -0.6241455078125, "loss": 9.3896, "nll_loss": 0.5596923828125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0554962158203125, "rewards/margins": 0.006927490234375, "rewards/rejected": -0.0624237060546875, "step": 495 }, { "epoch": 0.26776435656926917, "grad_norm": 0.20748003041036991, "learning_rate": 1.6760264673409707e-05, "log_odds_chosen": 0.07489013671875, "log_odds_ratio": -0.6668701171875, "logits/chosen": -0.015106201171875, "logits/rejected": 0.12465667724609375, "logps/chosen": -0.6790771484375, "logps/rejected": -0.6846923828125, "loss": 9.7993, "nll_loss": 0.67681884765625, "rewards/accuracies": 0.625, "rewards/chosen": -0.0679473876953125, "rewards/margins": 0.0005340576171875, "rewards/rejected": -0.0684814453125, "step": 496 }, { "epoch": 0.2683042040623524, "grad_norm": 0.2206309431911372, "learning_rate": 1.674769398734252e-05, "log_odds_chosen": 0.18994140625, "log_odds_ratio": -0.6102294921875, "logits/chosen": 0.26346588134765625, "logits/rejected": 0.2653350830078125, "logps/chosen": -0.5361328125, "logps/rejected": -0.6021728515625, "loss": 10.1299, "nll_loss": 0.543212890625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0535888671875, "rewards/margins": 0.0066070556640625, "rewards/rejected": -0.0601959228515625, "step": 497 }, { "epoch": 0.2688440515554356, "grad_norm": 0.26531030953362267, "learning_rate": 1.673510369458234e-05, "log_odds_chosen": 0.0693359375, "log_odds_ratio": -0.67822265625, "logits/chosen": 0.1156158447265625, "logits/rejected": 0.142913818359375, "logps/chosen": -0.6507568359375, "logps/rejected": -0.6556396484375, "loss": 10.7852, "nll_loss": 0.6812744140625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.065093994140625, "rewards/margins": 0.000457763671875, "rewards/rejected": -0.0655517578125, "step": 498 }, { "epoch": 0.2693838990485188, "grad_norm": 0.19615586285245162, "learning_rate": 1.6722493831712627e-05, "log_odds_chosen": 0.0904541015625, "log_odds_ratio": -0.658935546875, "logits/chosen": 0.5332565307617188, "logits/rejected": 0.6417160034179688, "logps/chosen": -0.5416259765625, "logps/rejected": -0.578857421875, "loss": 9.457, "nll_loss": 0.55517578125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0541839599609375, "rewards/margins": 0.00374603271484375, "rewards/rejected": -0.05792999267578125, "step": 499 }, { "epoch": 0.269923746541602, "grad_norm": 0.28954736459203867, "learning_rate": 1.670986443537371e-05, "log_odds_chosen": 0.20556640625, "log_odds_ratio": -0.5999755859375, "logits/chosen": 0.3411865234375, "logits/rejected": 0.385986328125, "logps/chosen": -0.43951416015625, "logps/rejected": -0.509765625, "loss": 9.293, "nll_loss": 0.4439697265625, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04393768310546875, "rewards/margins": 0.00705718994140625, "rewards/rejected": -0.050994873046875, "step": 500 }, { "epoch": 0.27046359403468523, "grad_norm": 0.2166006490004533, "learning_rate": 1.6697215542262674e-05, "log_odds_chosen": 0.029541015625, "log_odds_ratio": -0.6900634765625, "logits/chosen": 0.39255523681640625, "logits/rejected": 0.508758544921875, "logps/chosen": -0.633056640625, "logps/rejected": -0.6258544921875, "loss": 9.2266, "nll_loss": 0.6290283203125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.06329345703125, "rewards/margins": -0.0006866455078125, "rewards/rejected": -0.0626068115234375, "step": 501 }, { "epoch": 0.2710034415277684, "grad_norm": 0.23025630456008964, "learning_rate": 1.6684547189133253e-05, "log_odds_chosen": -0.03662109375, "log_odds_ratio": -0.72119140625, "logits/chosen": 0.30062103271484375, "logits/rejected": 0.46958160400390625, "logps/chosen": -0.7352294921875, "logps/rejected": -0.6895751953125, "loss": 11.2158, "nll_loss": 0.7353515625, "rewards/accuracies": 0.5, "rewards/chosen": -0.07352447509765625, "rewards/margins": -0.00457000732421875, "rewards/rejected": -0.0689544677734375, "step": 502 }, { "epoch": 0.2715432890208516, "grad_norm": 0.18992599062883544, "learning_rate": 1.6671859412795727e-05, "log_odds_chosen": 0.013427734375, "log_odds_ratio": -0.69580078125, "logits/chosen": 0.4159698486328125, "logits/rejected": 0.4966900944709778, "logps/chosen": -0.5833740234375, "logps/rejected": -0.56884765625, "loss": 9.459, "nll_loss": 0.58935546875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05829620361328125, "rewards/margins": -0.0014190673828125, "rewards/rejected": -0.05687713623046875, "step": 503 }, { "epoch": 0.2720831365139348, "grad_norm": 0.20569110846438893, "learning_rate": 1.665915225011681e-05, "log_odds_chosen": 0.0364990234375, "log_odds_ratio": -0.677490234375, "logits/chosen": 0.40761756896972656, "logits/rejected": 0.5388412475585938, "logps/chosen": -0.49560546875, "logps/rejected": -0.5047607421875, "loss": 9.4077, "nll_loss": 0.5009765625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.049560546875, "rewards/margins": 0.0009002685546875, "rewards/rejected": -0.0504608154296875, "step": 504 }, { "epoch": 0.272622984007018, "grad_norm": 0.1956744908677378, "learning_rate": 1.6646425738019565e-05, "log_odds_chosen": 0.0086669921875, "log_odds_ratio": -0.697509765625, "logits/chosen": 0.09163665771484375, "logits/rejected": 0.20120763778686523, "logps/chosen": -0.505615234375, "logps/rejected": -0.4991455078125, "loss": 8.936, "nll_loss": 0.5142822265625, "rewards/accuracies": 0.5, "rewards/chosen": -0.050506591796875, "rewards/margins": -0.00055694580078125, "rewards/rejected": -0.04994964599609375, "step": 505 }, { "epoch": 0.27316283150010123, "grad_norm": 0.1901672531291144, "learning_rate": 1.6633679913483253e-05, "log_odds_chosen": 0.04443359375, "log_odds_ratio": -0.67431640625, "logits/chosen": 0.478485107421875, "logits/rejected": 0.5364990234375, "logps/chosen": -0.4580078125, "logps/rejected": -0.468017578125, "loss": 9.4453, "nll_loss": 0.4757080078125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04579925537109375, "rewards/margins": 0.00101470947265625, "rewards/rejected": -0.04681396484375, "step": 506 }, { "epoch": 0.2737026789931844, "grad_norm": 0.20076605187769594, "learning_rate": 1.662091481354327e-05, "log_odds_chosen": 0.083251953125, "log_odds_ratio": -0.654541015625, "logits/chosen": 0.319915771484375, "logits/rejected": 0.4010031223297119, "logps/chosen": -0.44671630859375, "logps/rejected": -0.471435546875, "loss": 9.5913, "nll_loss": 0.4561767578125, "rewards/accuracies": 0.75, "rewards/chosen": -0.044677734375, "rewards/margins": 0.0024566650390625, "rewards/rejected": -0.0471343994140625, "step": 507 }, { "epoch": 0.2742425264862676, "grad_norm": 0.1875047541675762, "learning_rate": 1.6608130475291007e-05, "log_odds_chosen": 0.056884765625, "log_odds_ratio": -0.669677734375, "logits/chosen": 0.32666015625, "logits/rejected": 0.36468505859375, "logps/chosen": -0.4697265625, "logps/rejected": -0.476806640625, "loss": 8.6313, "nll_loss": 0.4716796875, "rewards/accuracies": 0.625, "rewards/chosen": -0.0469818115234375, "rewards/margins": 0.000701904296875, "rewards/rejected": -0.0476837158203125, "step": 508 }, { "epoch": 0.27478237397935085, "grad_norm": 0.19078453650550725, "learning_rate": 1.6595326935873762e-05, "log_odds_chosen": -0.02008056640625, "log_odds_ratio": -0.705078125, "logits/chosen": 0.6814804077148438, "logits/rejected": 0.7131805419921875, "logps/chosen": -0.644775390625, "logps/rejected": -0.6234130859375, "loss": 9.0229, "nll_loss": 0.6510009765625, "rewards/accuracies": 0.5, "rewards/chosen": -0.06446075439453125, "rewards/margins": -0.00212860107421875, "rewards/rejected": -0.0623321533203125, "step": 509 }, { "epoch": 0.275322221472434, "grad_norm": 0.1876878897116015, "learning_rate": 1.658250423249463e-05, "log_odds_chosen": 0.07257080078125, "log_odds_ratio": -0.658935546875, "logits/chosen": 0.132080078125, "logits/rejected": 0.236907958984375, "logps/chosen": -0.46478271484375, "logps/rejected": -0.482421875, "loss": 9.3042, "nll_loss": 0.470947265625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04648590087890625, "rewards/margins": 0.0017852783203125, "rewards/rejected": -0.04827117919921875, "step": 510 }, { "epoch": 0.27586206896551724, "grad_norm": 0.19357455542640434, "learning_rate": 1.656966240241238e-05, "log_odds_chosen": -0.00927734375, "log_odds_ratio": -0.69873046875, "logits/chosen": 0.36041259765625, "logits/rejected": 0.4908294677734375, "logps/chosen": -0.443115234375, "logps/rejected": -0.435791015625, "loss": 9.002, "nll_loss": 0.4661865234375, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0443115234375, "rewards/margins": -0.00072479248046875, "rewards/rejected": -0.04358673095703125, "step": 511 }, { "epoch": 0.27640191645860046, "grad_norm": 0.1957426320551066, "learning_rate": 1.6556801482941364e-05, "log_odds_chosen": 0.0283203125, "log_odds_ratio": -0.689453125, "logits/chosen": 0.5751953125, "logits/rejected": 0.7048988342285156, "logps/chosen": -0.6507568359375, "logps/rejected": -0.6263427734375, "loss": 9.6987, "nll_loss": 0.66607666015625, "rewards/accuracies": 0.625, "rewards/chosen": -0.06507110595703125, "rewards/margins": -0.00241851806640625, "rewards/rejected": -0.062652587890625, "step": 512 }, { "epoch": 0.27694176395168363, "grad_norm": 0.21075300941329483, "learning_rate": 1.6543921511451402e-05, "log_odds_chosen": 0.036865234375, "log_odds_ratio": -0.677001953125, "logits/chosen": 0.3001251220703125, "logits/rejected": 0.4490680694580078, "logps/chosen": -0.465087890625, "logps/rejected": -0.47412109375, "loss": 9.3457, "nll_loss": 0.4716796875, "rewards/accuracies": 0.5, "rewards/chosen": -0.0465087890625, "rewards/margins": 0.00091552734375, "rewards/rejected": -0.04742431640625, "step": 513 }, { "epoch": 0.27748161144476685, "grad_norm": 0.21738015146571188, "learning_rate": 1.6531022525367674e-05, "log_odds_chosen": 0.0196533203125, "log_odds_ratio": -0.68505859375, "logits/chosen": 0.41680145263671875, "logits/rejected": 0.47452545166015625, "logps/chosen": -0.478271484375, "logps/rejected": -0.4805908203125, "loss": 10.2422, "nll_loss": 0.493896484375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0478363037109375, "rewards/margins": 0.000244140625, "rewards/rejected": -0.0480804443359375, "step": 514 }, { "epoch": 0.2780214589378501, "grad_norm": 0.1819350328677368, "learning_rate": 1.65181045621706e-05, "log_odds_chosen": 0.0523681640625, "log_odds_ratio": -0.6700439453125, "logits/chosen": 0.2684783935546875, "logits/rejected": 0.33875274658203125, "logps/chosen": -0.470703125, "logps/rejected": -0.4862060546875, "loss": 8.5962, "nll_loss": 0.4818115234375, "rewards/accuracies": 0.625, "rewards/chosen": -0.04703521728515625, "rewards/margins": 0.00157928466796875, "rewards/rejected": -0.048614501953125, "step": 515 }, { "epoch": 0.27856130643093324, "grad_norm": 0.19414175409401255, "learning_rate": 1.6505167659395758e-05, "log_odds_chosen": 0.0762939453125, "log_odds_ratio": -0.656005859375, "logits/chosen": 0.17645263671875, "logits/rejected": 0.2757110595703125, "logps/chosen": -0.4002685546875, "logps/rejected": -0.423583984375, "loss": 9.1016, "nll_loss": 0.4091796875, "rewards/accuracies": 0.75, "rewards/chosen": -0.040008544921875, "rewards/margins": 0.0023651123046875, "rewards/rejected": -0.0423736572265625, "step": 516 }, { "epoch": 0.27910115392401647, "grad_norm": 0.23037219973188686, "learning_rate": 1.6492211854633748e-05, "log_odds_chosen": -0.02532958984375, "log_odds_ratio": -0.708740234375, "logits/chosen": 0.4398612976074219, "logits/rejected": 0.69915771484375, "logps/chosen": -0.6923828125, "logps/rejected": -0.6671142578125, "loss": 10.7827, "nll_loss": 0.713623046875, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0692901611328125, "rewards/margins": -0.0026092529296875, "rewards/rejected": -0.066680908203125, "step": 517 }, { "epoch": 0.2796410014170997, "grad_norm": 0.1804287268299922, "learning_rate": 1.6479237185530093e-05, "log_odds_chosen": 0.04046630859375, "log_odds_ratio": -0.675537109375, "logits/chosen": 0.44376182556152344, "logits/rejected": 0.506591796875, "logps/chosen": -0.45184326171875, "logps/rejected": -0.4576416015625, "loss": 8.4321, "nll_loss": 0.45989990234375, "rewards/accuracies": 0.5, "rewards/chosen": -0.045166015625, "rewards/margins": 0.0005950927734375, "rewards/rejected": -0.0457611083984375, "step": 518 }, { "epoch": 0.28018084891018286, "grad_norm": 0.21286356845613058, "learning_rate": 1.6466243689785136e-05, "log_odds_chosen": 0.0391845703125, "log_odds_ratio": -0.677490234375, "logits/chosen": 0.23185884952545166, "logits/rejected": 0.3465728759765625, "logps/chosen": -0.4766845703125, "logps/rejected": -0.4737548828125, "loss": 10.4312, "nll_loss": 0.493896484375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04769134521484375, "rewards/margins": -0.0002899169921875, "rewards/rejected": -0.04740142822265625, "step": 519 }, { "epoch": 0.2807206964032661, "grad_norm": 0.20938324230135782, "learning_rate": 1.645323140515392e-05, "log_odds_chosen": 0.0130615234375, "log_odds_ratio": -0.68994140625, "logits/chosen": 0.11565399169921875, "logits/rejected": 0.140838623046875, "logps/chosen": -0.3890380859375, "logps/rejected": -0.39129638671875, "loss": 9.5269, "nll_loss": 0.3916015625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0388641357421875, "rewards/margins": 0.0002593994140625, "rewards/rejected": -0.03912353515625, "step": 520 }, { "epoch": 0.2812605438963493, "grad_norm": 0.19044557229996598, "learning_rate": 1.6440200369446077e-05, "log_odds_chosen": 0.08001708984375, "log_odds_ratio": -0.655029296875, "logits/chosen": 0.19544219970703125, "logits/rejected": 0.2988548278808594, "logps/chosen": -0.44232177734375, "logps/rejected": -0.4659423828125, "loss": 9.4346, "nll_loss": 0.4439697265625, "rewards/accuracies": 0.875, "rewards/chosen": -0.0442352294921875, "rewards/margins": 0.002349853515625, "rewards/rejected": -0.0465850830078125, "step": 521 }, { "epoch": 0.28180039138943247, "grad_norm": 0.19724550272745386, "learning_rate": 1.6427150620525736e-05, "log_odds_chosen": 0.0438232421875, "log_odds_ratio": -0.67431640625, "logits/chosen": 0.2436981201171875, "logits/rejected": 0.29461669921875, "logps/chosen": -0.46929931640625, "logps/rejected": -0.4793701171875, "loss": 8.6182, "nll_loss": 0.471435546875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0469207763671875, "rewards/margins": 0.00101470947265625, "rewards/rejected": -0.04793548583984375, "step": 522 }, { "epoch": 0.2823402388825157, "grad_norm": 0.17699546855393522, "learning_rate": 1.6414082196311402e-05, "log_odds_chosen": 0.101806640625, "log_odds_ratio": -0.6461181640625, "logits/chosen": 0.554901123046875, "logits/rejected": 0.645599365234375, "logps/chosen": -0.4779052734375, "logps/rejected": -0.5091552734375, "loss": 9.3716, "nll_loss": 0.49072265625, "rewards/accuracies": 0.625, "rewards/chosen": -0.04776763916015625, "rewards/margins": 0.00313568115234375, "rewards/rejected": -0.0509033203125, "step": 523 }, { "epoch": 0.2828800863755989, "grad_norm": 0.20149687250490056, "learning_rate": 1.6400995134775824e-05, "log_odds_chosen": 0.1036376953125, "log_odds_ratio": -0.6466064453125, "logits/chosen": 0.30109405517578125, "logits/rejected": 0.30364990234375, "logps/chosen": -0.439453125, "logps/rejected": -0.47216796875, "loss": 8.7871, "nll_loss": 0.44781494140625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04393768310546875, "rewards/margins": 0.00327301025390625, "rewards/rejected": -0.047210693359375, "step": 524 }, { "epoch": 0.2834199338686821, "grad_norm": 0.21681329127982787, "learning_rate": 1.6387889473945925e-05, "log_odds_chosen": 0.04412841796875, "log_odds_ratio": -0.67236328125, "logits/chosen": 0.489044189453125, "logits/rejected": 0.5678482055664062, "logps/chosen": -0.47784423828125, "logps/rejected": -0.490234375, "loss": 9.8416, "nll_loss": 0.497314453125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04778289794921875, "rewards/margins": 0.00121307373046875, "rewards/rejected": -0.0489959716796875, "step": 525 }, { "epoch": 0.2839597813617653, "grad_norm": 0.1963954299102166, "learning_rate": 1.637476525190267e-05, "log_odds_chosen": 0.0833740234375, "log_odds_ratio": -0.6568603515625, "logits/chosen": 0.1913909912109375, "logits/rejected": 0.2852783203125, "logps/chosen": -0.5335693359375, "logps/rejected": -0.5576171875, "loss": 9.0684, "nll_loss": 0.5377197265625, "rewards/accuracies": 0.625, "rewards/chosen": -0.05338287353515625, "rewards/margins": 0.00238800048828125, "rewards/rejected": -0.0557708740234375, "step": 526 }, { "epoch": 0.28449962885484853, "grad_norm": 0.2147763099934138, "learning_rate": 1.6361622506780947e-05, "log_odds_chosen": 0.0489501953125, "log_odds_ratio": -0.6708984375, "logits/chosen": 0.309783935546875, "logits/rejected": 0.4287109375, "logps/chosen": -0.4334716796875, "logps/rejected": -0.4453125, "loss": 9.5112, "nll_loss": 0.4420166015625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04337310791015625, "rewards/margins": 0.001129150390625, "rewards/rejected": -0.04450225830078125, "step": 527 }, { "epoch": 0.2850394763479317, "grad_norm": 0.17719723233124268, "learning_rate": 1.634846127676947e-05, "log_odds_chosen": 0.03521728515625, "log_odds_ratio": -0.677734375, "logits/chosen": 0.19550323486328125, "logits/rejected": 0.3749570846557617, "logps/chosen": -0.42596435546875, "logps/rejected": -0.433349609375, "loss": 8.2812, "nll_loss": 0.43914794921875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04261016845703125, "rewards/margins": 0.0007171630859375, "rewards/rejected": -0.04332733154296875, "step": 528 }, { "epoch": 0.2855793238410149, "grad_norm": 0.17482913008893325, "learning_rate": 1.633528160011067e-05, "log_odds_chosen": 0.092529296875, "log_odds_ratio": -0.6494140625, "logits/chosen": 0.3133087158203125, "logits/rejected": 0.3741302490234375, "logps/chosen": -0.4547119140625, "logps/rejected": -0.4822998046875, "loss": 8.3208, "nll_loss": 0.4659423828125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0454864501953125, "rewards/margins": 0.0027313232421875, "rewards/rejected": -0.0482177734375, "step": 529 }, { "epoch": 0.28611917133409814, "grad_norm": 0.19154370537153376, "learning_rate": 1.6322083515100574e-05, "log_odds_chosen": 0.0057373046875, "log_odds_ratio": -0.691650390625, "logits/chosen": 0.59136962890625, "logits/rejected": 0.7188882827758789, "logps/chosen": -0.51220703125, "logps/rejected": -0.5086669921875, "loss": 9.4146, "nll_loss": 0.5262451171875, "rewards/accuracies": 0.625, "rewards/chosen": -0.0512542724609375, "rewards/margins": -0.0003814697265625, "rewards/rejected": -0.050872802734375, "step": 530 }, { "epoch": 0.2866590188271813, "grad_norm": 0.21162123663145058, "learning_rate": 1.63088670600887e-05, "log_odds_chosen": 0.012939453125, "log_odds_ratio": -0.688720703125, "logits/chosen": 0.4748077392578125, "logits/rejected": 0.5231170654296875, "logps/chosen": -0.6190185546875, "logps/rejected": -0.6259765625, "loss": 11.1553, "nll_loss": 0.6190185546875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0619354248046875, "rewards/margins": 0.0006561279296875, "rewards/rejected": -0.062591552734375, "step": 531 }, { "epoch": 0.28719886632026453, "grad_norm": 0.1965710253535175, "learning_rate": 1.6295632273477937e-05, "log_odds_chosen": 0.049072265625, "log_odds_ratio": -0.67138671875, "logits/chosen": 0.48796844482421875, "logits/rejected": 0.615386962890625, "logps/chosen": -0.49755859375, "logps/rejected": -0.51171875, "loss": 8.8179, "nll_loss": 0.50244140625, "rewards/accuracies": 0.625, "rewards/chosen": -0.049774169921875, "rewards/margins": 0.001434326171875, "rewards/rejected": -0.05120849609375, "step": 532 }, { "epoch": 0.2877387138133477, "grad_norm": 0.19439643562807288, "learning_rate": 1.628237919372445e-05, "log_odds_chosen": 0.00909423828125, "log_odds_ratio": -0.693115234375, "logits/chosen": 0.4474678039550781, "logits/rejected": 0.5788211822509766, "logps/chosen": -0.57745361328125, "logps/rejected": -0.56201171875, "loss": 9.5122, "nll_loss": 0.5792236328125, "rewards/accuracies": 0.5, "rewards/chosen": -0.05776214599609375, "rewards/margins": -0.00157928466796875, "rewards/rejected": -0.056182861328125, "step": 533 }, { "epoch": 0.2882785613064309, "grad_norm": 0.19462530251811908, "learning_rate": 1.626910785933755e-05, "log_odds_chosen": 0.067626953125, "log_odds_ratio": -0.662109375, "logits/chosen": 0.40235137939453125, "logits/rejected": 0.4578742980957031, "logps/chosen": -0.47930908203125, "logps/rejected": -0.495361328125, "loss": 9.3477, "nll_loss": 0.48687744140625, "rewards/accuracies": 0.625, "rewards/chosen": -0.04793548583984375, "rewards/margins": 0.00159454345703125, "rewards/rejected": -0.049530029296875, "step": 534 }, { "epoch": 0.28881840879951415, "grad_norm": 0.2014240917394722, "learning_rate": 1.6255818308879594e-05, "log_odds_chosen": 0.105712890625, "log_odds_ratio": -0.64501953125, "logits/chosen": 0.29559326171875, "logits/rejected": 0.26922607421875, "logps/chosen": -0.468017578125, "logps/rejected": -0.495361328125, "loss": 10.165, "nll_loss": 0.4786376953125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0468292236328125, "rewards/margins": 0.0027008056640625, "rewards/rejected": -0.049530029296875, "step": 535 }, { "epoch": 0.2893582562925973, "grad_norm": 0.1929883092705178, "learning_rate": 1.6242510580965865e-05, "log_odds_chosen": 0.042724609375, "log_odds_ratio": -0.67431640625, "logits/chosen": 0.18225860595703125, "logits/rejected": 0.2978553771972656, "logps/chosen": -0.50238037109375, "logps/rejected": -0.505126953125, "loss": 9.062, "nll_loss": 0.50482177734375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0502471923828125, "rewards/margins": 0.00026702880859375, "rewards/rejected": -0.05051422119140625, "step": 536 }, { "epoch": 0.28989810378568054, "grad_norm": 0.21643551387997198, "learning_rate": 1.6229184714264473e-05, "log_odds_chosen": -0.000244140625, "log_odds_ratio": -0.700439453125, "logits/chosen": 0.16015625, "logits/rejected": 0.362091064453125, "logps/chosen": -0.6275634765625, "logps/rejected": -0.5921630859375, "loss": 11.3457, "nll_loss": 0.63287353515625, "rewards/accuracies": 0.625, "rewards/chosen": -0.06276702880859375, "rewards/margins": -0.00360107421875, "rewards/rejected": -0.05916595458984375, "step": 537 }, { "epoch": 0.29043795127876376, "grad_norm": 0.19994265851916046, "learning_rate": 1.6215840747496226e-05, "log_odds_chosen": 0.066650390625, "log_odds_ratio": -0.662353515625, "logits/chosen": 0.2718505859375, "logits/rejected": 0.38535308837890625, "logps/chosen": -0.4498291015625, "logps/rejected": -0.4698486328125, "loss": 8.6367, "nll_loss": 0.458984375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04499053955078125, "rewards/margins": 0.00197601318359375, "rewards/rejected": -0.046966552734375, "step": 538 }, { "epoch": 0.29097779877184693, "grad_norm": 0.19532922986058834, "learning_rate": 1.6202478719434524e-05, "log_odds_chosen": -0.03515625, "log_odds_ratio": -0.717041015625, "logits/chosen": 0.497344970703125, "logits/rejected": 0.5027618408203125, "logps/chosen": -0.610595703125, "logps/rejected": -0.57421875, "loss": 9.3433, "nll_loss": 0.625244140625, "rewards/accuracies": 0.5, "rewards/chosen": -0.061065673828125, "rewards/margins": -0.0036773681640625, "rewards/rejected": -0.0573883056640625, "step": 539 }, { "epoch": 0.29151764626493015, "grad_norm": 0.18897449237986003, "learning_rate": 1.6189098668905258e-05, "log_odds_chosen": 0.090087890625, "log_odds_ratio": -0.650634765625, "logits/chosen": 0.54107666015625, "logits/rejected": 0.577362060546875, "logps/chosen": -0.4798583984375, "logps/rejected": -0.5059814453125, "loss": 9.3765, "nll_loss": 0.485595703125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0479736328125, "rewards/margins": 0.00262451171875, "rewards/rejected": -0.05059814453125, "step": 540 }, { "epoch": 0.2920574937580134, "grad_norm": 0.1774525570394893, "learning_rate": 1.6175700634786672e-05, "log_odds_chosen": 0.059326171875, "log_odds_ratio": -0.6650390625, "logits/chosen": 0.3441162109375, "logits/rejected": 0.42401123046875, "logps/chosen": -0.4830322265625, "logps/rejected": -0.5015869140625, "loss": 8.27, "nll_loss": 0.4840087890625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04830169677734375, "rewards/margins": 0.00185394287109375, "rewards/rejected": -0.0501556396484375, "step": 541 }, { "epoch": 0.29259734125109654, "grad_norm": 0.22648676270263718, "learning_rate": 1.6162284656009276e-05, "log_odds_chosen": 0.0322265625, "log_odds_ratio": -0.68115234375, "logits/chosen": 0.2905693054199219, "logits/rejected": 0.468536376953125, "logps/chosen": -0.6231689453125, "logps/rejected": -0.6158447265625, "loss": 10.665, "nll_loss": 0.6207275390625, "rewards/accuracies": 0.625, "rewards/chosen": -0.0623016357421875, "rewards/margins": -0.0007476806640625, "rewards/rejected": -0.061553955078125, "step": 542 }, { "epoch": 0.29313718874417977, "grad_norm": 0.19962620278755655, "learning_rate": 1.6148850771555715e-05, "log_odds_chosen": 0.12200927734375, "log_odds_ratio": -0.6363525390625, "logits/chosen": 0.24139404296875, "logits/rejected": 0.30107879638671875, "logps/chosen": -0.5078125, "logps/rejected": -0.5469970703125, "loss": 9.3232, "nll_loss": 0.51043701171875, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0507965087890625, "rewards/margins": 0.00390625, "rewards/rejected": -0.0547027587890625, "step": 543 }, { "epoch": 0.293677036237263, "grad_norm": 0.20297005011920816, "learning_rate": 1.6135399020460666e-05, "log_odds_chosen": 0.1270751953125, "log_odds_ratio": -0.6337890625, "logits/chosen": 0.3323020935058594, "logits/rejected": 0.3997344970703125, "logps/chosen": -0.47509765625, "logps/rejected": -0.5150146484375, "loss": 10.5308, "nll_loss": 0.4871826171875, "rewards/accuracies": 0.75, "rewards/chosen": -0.04746246337890625, "rewards/margins": 0.00405120849609375, "rewards/rejected": -0.051513671875, "step": 544 }, { "epoch": 0.29421688373034616, "grad_norm": 0.19073953404237579, "learning_rate": 1.6121929441810723e-05, "log_odds_chosen": 0.0626220703125, "log_odds_ratio": -0.664794921875, "logits/chosen": 0.4970245361328125, "logits/rejected": 0.539154052734375, "logps/chosen": -0.5185546875, "logps/rejected": -0.5350341796875, "loss": 9.6323, "nll_loss": 0.5333251953125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0518646240234375, "rewards/margins": 0.00159454345703125, "rewards/rejected": -0.05345916748046875, "step": 545 }, { "epoch": 0.2947567312234294, "grad_norm": 0.21259437340017578, "learning_rate": 1.6108442074744272e-05, "log_odds_chosen": 0.02899169921875, "log_odds_ratio": -0.687744140625, "logits/chosen": -0.003482341766357422, "logits/rejected": 0.09112548828125, "logps/chosen": -0.5301513671875, "logps/rejected": -0.51885986328125, "loss": 9.8188, "nll_loss": 0.5301513671875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05303192138671875, "rewards/margins": -0.00116729736328125, "rewards/rejected": -0.0518646240234375, "step": 546 }, { "epoch": 0.2952965787165126, "grad_norm": 0.19630941745658675, "learning_rate": 1.60949369584514e-05, "log_odds_chosen": 0.13330078125, "log_odds_ratio": -0.6322021484375, "logits/chosen": 0.120849609375, "logits/rejected": 0.1952667236328125, "logps/chosen": -0.412841796875, "logps/rejected": -0.4500732421875, "loss": 8.7793, "nll_loss": 0.4281005859375, "rewards/accuracies": 0.75, "rewards/chosen": -0.041309356689453125, "rewards/margins": 0.003704071044921875, "rewards/rejected": -0.045013427734375, "step": 547 }, { "epoch": 0.29583642620959577, "grad_norm": 0.1882228514896771, "learning_rate": 1.6081414132173755e-05, "log_odds_chosen": 0.06982421875, "log_odds_ratio": -0.661376953125, "logits/chosen": 0.2471923828125, "logits/rejected": 0.2653541564941406, "logps/chosen": -0.41632080078125, "logps/rejected": -0.43212890625, "loss": 9.3018, "nll_loss": 0.4232177734375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0416412353515625, "rewards/margins": 0.00157928466796875, "rewards/rejected": -0.04322052001953125, "step": 548 }, { "epoch": 0.296376273702679, "grad_norm": 0.21628902256551444, "learning_rate": 1.6067873635204453e-05, "log_odds_chosen": -0.05859375, "log_odds_ratio": -0.73095703125, "logits/chosen": 0.15707778930664062, "logits/rejected": 0.27996826171875, "logps/chosen": -0.56298828125, "logps/rejected": -0.517578125, "loss": 9.6167, "nll_loss": 0.5728759765625, "rewards/accuracies": 0.5, "rewards/chosen": -0.05629730224609375, "rewards/margins": -0.00457000732421875, "rewards/rejected": -0.051727294921875, "step": 549 }, { "epoch": 0.2969161211957622, "grad_norm": 0.20282543673481274, "learning_rate": 1.605431550688795e-05, "log_odds_chosen": 0.0079345703125, "log_odds_ratio": -0.69287109375, "logits/chosen": 0.6076040267944336, "logits/rejected": 0.763885498046875, "logps/chosen": -0.58416748046875, "logps/rejected": -0.576171875, "loss": 9.8486, "nll_loss": 0.601806640625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.05841827392578125, "rewards/margins": -0.00079345703125, "rewards/rejected": -0.05762481689453125, "step": 550 }, { "epoch": 0.2974559686888454, "grad_norm": 0.19599285111299683, "learning_rate": 1.604073978661994e-05, "log_odds_chosen": 0.0101318359375, "log_odds_ratio": -0.6904296875, "logits/chosen": 0.3373565673828125, "logits/rejected": 0.4164924621582031, "logps/chosen": -0.4781494140625, "logps/rejected": -0.478759765625, "loss": 9.4194, "nll_loss": 0.4989013671875, "rewards/accuracies": 0.5, "rewards/chosen": -0.04782867431640625, "rewards/margins": 6.103515625e-05, "rewards/rejected": -0.04788970947265625, "step": 551 }, { "epoch": 0.2979958161819286, "grad_norm": 0.2053372912646269, "learning_rate": 1.6027146513847222e-05, "log_odds_chosen": 0.06103515625, "log_odds_ratio": -0.664306640625, "logits/chosen": 0.46197509765625, "logits/rejected": 0.49730873107910156, "logps/chosen": -0.4549560546875, "logps/rejected": -0.4735107421875, "loss": 9.2822, "nll_loss": 0.4620361328125, "rewards/accuracies": 0.5, "rewards/chosen": -0.04549407958984375, "rewards/margins": 0.00185394287109375, "rewards/rejected": -0.0473480224609375, "step": 552 }, { "epoch": 0.29853566367501183, "grad_norm": 0.19384410050126416, "learning_rate": 1.601353572806761e-05, "log_odds_chosen": 0.0101318359375, "log_odds_ratio": -0.690185546875, "logits/chosen": 0.647216796875, "logits/rejected": 0.7181396484375, "logps/chosen": -0.54345703125, "logps/rejected": -0.5418701171875, "loss": 9.9375, "nll_loss": 0.55712890625, "rewards/accuracies": 0.5, "rewards/chosen": -0.05438232421875, "rewards/margins": -0.00018310546875, "rewards/rejected": -0.05419921875, "step": 553 }, { "epoch": 0.299075511168095, "grad_norm": 0.19952175473846084, "learning_rate": 1.59999074688298e-05, "log_odds_chosen": 0.057861328125, "log_odds_ratio": -0.66650390625, "logits/chosen": 0.3584442138671875, "logits/rejected": 0.5381240844726562, "logps/chosen": -0.501953125, "logps/rejected": -0.51953125, "loss": 9.0391, "nll_loss": 0.511474609375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0501861572265625, "rewards/margins": 0.0017852783203125, "rewards/rejected": -0.051971435546875, "step": 554 }, { "epoch": 0.2996153586611782, "grad_norm": 0.17664284721306495, "learning_rate": 1.5986261775733264e-05, "log_odds_chosen": 0.10076904296875, "log_odds_ratio": -0.64599609375, "logits/chosen": 0.17264175415039062, "logits/rejected": 0.23566055297851562, "logps/chosen": -0.48358154296875, "logps/rejected": -0.5126953125, "loss": 9.1079, "nll_loss": 0.490234375, "rewards/accuracies": 0.75, "rewards/chosen": -0.04834747314453125, "rewards/margins": 0.0029296875, "rewards/rejected": -0.05127716064453125, "step": 555 }, { "epoch": 0.30015520615426144, "grad_norm": 0.17921249335525205, "learning_rate": 1.597259868842812e-05, "log_odds_chosen": 0.0379638671875, "log_odds_ratio": -0.677978515625, "logits/chosen": -0.0042858123779296875, "logits/rejected": 0.1450653076171875, "logps/chosen": -0.47705078125, "logps/rejected": -0.4815673828125, "loss": 8.7471, "nll_loss": 0.4794921875, "rewards/accuracies": 0.75, "rewards/chosen": -0.04769134521484375, "rewards/margins": 0.00045013427734375, "rewards/rejected": -0.0481414794921875, "step": 556 }, { "epoch": 0.3006950536473446, "grad_norm": 0.1784350900326259, "learning_rate": 1.5958918246615042e-05, "log_odds_chosen": 0.1234130859375, "log_odds_ratio": -0.63720703125, "logits/chosen": 0.25156402587890625, "logits/rejected": 0.28746795654296875, "logps/chosen": -0.47711181640625, "logps/rejected": -0.5177001953125, "loss": 9.2671, "nll_loss": 0.4781494140625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04767608642578125, "rewards/margins": 0.00409698486328125, "rewards/rejected": -0.0517730712890625, "step": 557 }, { "epoch": 0.30123490114042784, "grad_norm": 0.18856240932855545, "learning_rate": 1.594522049004512e-05, "log_odds_chosen": 0.0654296875, "log_odds_ratio": -0.6630859375, "logits/chosen": 0.387237548828125, "logits/rejected": 0.4799041748046875, "logps/chosen": -0.487548828125, "logps/rejected": -0.50927734375, "loss": 8.9019, "nll_loss": 0.491455078125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04875946044921875, "rewards/margins": 0.00214385986328125, "rewards/rejected": -0.0509033203125, "step": 558 }, { "epoch": 0.30177474863351106, "grad_norm": 0.19976423441291777, "learning_rate": 1.5931505458519763e-05, "log_odds_chosen": 0.069091796875, "log_odds_ratio": -0.6622314453125, "logits/chosen": 0.4991912841796875, "logits/rejected": 0.65155029296875, "logps/chosen": -0.51220703125, "logps/rejected": -0.529541015625, "loss": 9.1814, "nll_loss": 0.5211181640625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05121612548828125, "rewards/margins": 0.00173187255859375, "rewards/rejected": -0.052947998046875, "step": 559 }, { "epoch": 0.3023145961265942, "grad_norm": 0.21937907600061707, "learning_rate": 1.5917773191890572e-05, "log_odds_chosen": 0.0238037109375, "log_odds_ratio": -0.68359375, "logits/chosen": 0.746795654296875, "logits/rejected": 0.8754425048828125, "logps/chosen": -0.541259765625, "logps/rejected": -0.5484619140625, "loss": 9.4692, "nll_loss": 0.552001953125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0541534423828125, "rewards/margins": 0.00067138671875, "rewards/rejected": -0.0548248291015625, "step": 560 }, { "epoch": 0.30285444361967745, "grad_norm": 0.18856596071726547, "learning_rate": 1.5904023730059227e-05, "log_odds_chosen": 0.0819091796875, "log_odds_ratio": -0.6552734375, "logits/chosen": 0.73651123046875, "logits/rejected": 0.8087387084960938, "logps/chosen": -0.5257568359375, "logps/rejected": -0.552001953125, "loss": 9.5845, "nll_loss": 0.563232421875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0525970458984375, "rewards/margins": 0.0025787353515625, "rewards/rejected": -0.05517578125, "step": 561 }, { "epoch": 0.3033942911127607, "grad_norm": 0.21750047840385542, "learning_rate": 1.5890257112977375e-05, "log_odds_chosen": 0.07568359375, "log_odds_ratio": -0.6595458984375, "logits/chosen": 0.5557708740234375, "logits/rejected": 0.62689208984375, "logps/chosen": -0.6129150390625, "logps/rejected": -0.635009765625, "loss": 10.0195, "nll_loss": 0.61810302734375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0613250732421875, "rewards/margins": 0.0021514892578125, "rewards/rejected": -0.0634765625, "step": 562 }, { "epoch": 0.30393413860584384, "grad_norm": 0.1993865001277047, "learning_rate": 1.5876473380646512e-05, "log_odds_chosen": -0.0113525390625, "log_odds_ratio": -0.70166015625, "logits/chosen": 0.588134765625, "logits/rejected": 0.65411376953125, "logps/chosen": -0.5357666015625, "logps/rejected": -0.526611328125, "loss": 9.0684, "nll_loss": 0.5394287109375, "rewards/accuracies": 0.375, "rewards/chosen": -0.053558349609375, "rewards/margins": -0.0008697509765625, "rewards/rejected": -0.0526885986328125, "step": 563 }, { "epoch": 0.30447398609892706, "grad_norm": 0.20733112848770807, "learning_rate": 1.5862672573117857e-05, "log_odds_chosen": 0.067138671875, "log_odds_ratio": -0.6715087890625, "logits/chosen": 0.29962158203125, "logits/rejected": 0.3252716064453125, "logps/chosen": -0.58013916015625, "logps/rejected": -0.56689453125, "loss": 9.5247, "nll_loss": 0.58892822265625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05800628662109375, "rewards/margins": -0.00135040283203125, "rewards/rejected": -0.0566558837890625, "step": 564 }, { "epoch": 0.30501383359201023, "grad_norm": 0.19078965934187067, "learning_rate": 1.584885473049225e-05, "log_odds_chosen": 0.0152587890625, "log_odds_ratio": -0.68603515625, "logits/chosen": 0.8167724609375, "logits/rejected": 0.891357421875, "logps/chosen": -0.5467529296875, "logps/rejected": -0.551513671875, "loss": 9.9395, "nll_loss": 0.5543212890625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05467987060546875, "rewards/margins": 0.00046539306640625, "rewards/rejected": -0.055145263671875, "step": 565 }, { "epoch": 0.30555368108509345, "grad_norm": 0.18792575758948818, "learning_rate": 1.5835019892920038e-05, "log_odds_chosen": 0.0579833984375, "log_odds_ratio": -0.6669921875, "logits/chosen": 0.3322601318359375, "logits/rejected": 0.49591064453125, "logps/chosen": -0.460205078125, "logps/rejected": -0.4737548828125, "loss": 9.1289, "nll_loss": 0.4803466796875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0460662841796875, "rewards/margins": 0.0013275146484375, "rewards/rejected": -0.047393798828125, "step": 566 }, { "epoch": 0.3060935285781767, "grad_norm": 0.21048430281487157, "learning_rate": 1.582116810060093e-05, "log_odds_chosen": 0.0782470703125, "log_odds_ratio": -0.65771484375, "logits/chosen": 0.4931144714355469, "logits/rejected": 0.6379241943359375, "logps/chosen": -0.5225830078125, "logps/rejected": -0.5439453125, "loss": 9.6499, "nll_loss": 0.53369140625, "rewards/accuracies": 0.5, "rewards/chosen": -0.05226898193359375, "rewards/margins": 0.00211334228515625, "rewards/rejected": -0.05438232421875, "step": 567 }, { "epoch": 0.30663337607125984, "grad_norm": 0.19409000021098538, "learning_rate": 1.580729939378392e-05, "log_odds_chosen": 0.0819091796875, "log_odds_ratio": -0.6552734375, "logits/chosen": 0.393463134765625, "logits/rejected": 0.5066070556640625, "logps/chosen": -0.509033203125, "logps/rejected": -0.53125, "loss": 9.2437, "nll_loss": 0.5137939453125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0508880615234375, "rewards/margins": 0.002227783203125, "rewards/rejected": -0.0531158447265625, "step": 568 }, { "epoch": 0.30717322356434307, "grad_norm": 0.1949885856534611, "learning_rate": 1.5793413812767137e-05, "log_odds_chosen": 0.0902099609375, "log_odds_ratio": -0.652587890625, "logits/chosen": 0.1347188949584961, "logits/rejected": 0.2401113510131836, "logps/chosen": -0.4305419921875, "logps/rejected": -0.457763671875, "loss": 8.5483, "nll_loss": 0.4320068359375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0430755615234375, "rewards/margins": 0.002685546875, "rewards/rejected": -0.0457611083984375, "step": 569 }, { "epoch": 0.3077130710574263, "grad_norm": 0.18441533565499815, "learning_rate": 1.5779511397897752e-05, "log_odds_chosen": 0.086669921875, "log_odds_ratio": -0.65478515625, "logits/chosen": 0.42352294921875, "logits/rejected": 0.5283479690551758, "logps/chosen": -0.48565673828125, "logps/rejected": -0.50634765625, "loss": 9.3306, "nll_loss": 0.49444580078125, "rewards/accuracies": 0.5, "rewards/chosen": -0.04856109619140625, "rewards/margins": 0.0021209716796875, "rewards/rejected": -0.05068206787109375, "step": 570 }, { "epoch": 0.30825291855050946, "grad_norm": 0.20779842890826877, "learning_rate": 1.5765592189571837e-05, "log_odds_chosen": 0.100341796875, "log_odds_ratio": -0.652587890625, "logits/chosen": 0.310943603515625, "logits/rejected": 0.40476226806640625, "logps/chosen": -0.556884765625, "logps/rejected": -0.565185546875, "loss": 9.4917, "nll_loss": 0.55615234375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0556793212890625, "rewards/margins": 0.0008392333984375, "rewards/rejected": -0.0565185546875, "step": 571 }, { "epoch": 0.3087927660435927, "grad_norm": 0.25136380346875664, "learning_rate": 1.575165622823427e-05, "log_odds_chosen": 0.0511474609375, "log_odds_ratio": -0.673828125, "logits/chosen": 0.228179931640625, "logits/rejected": 0.24613189697265625, "logps/chosen": -0.436279296875, "logps/rejected": -0.4566650390625, "loss": 8.8018, "nll_loss": 0.4381103515625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04363250732421875, "rewards/margins": 0.0020751953125, "rewards/rejected": -0.04570770263671875, "step": 572 }, { "epoch": 0.3093326135366759, "grad_norm": 0.21635257300719674, "learning_rate": 1.573770355437861e-05, "log_odds_chosen": -0.01068115234375, "log_odds_ratio": -0.703125, "logits/chosen": 0.182159423828125, "logits/rejected": 0.250640869140625, "logps/chosen": -0.5924072265625, "logps/rejected": -0.5709228515625, "loss": 9.9092, "nll_loss": 0.5992431640625, "rewards/accuracies": 0.625, "rewards/chosen": -0.059234619140625, "rewards/margins": -0.002166748046875, "rewards/rejected": -0.05706787109375, "step": 573 }, { "epoch": 0.30987246102975907, "grad_norm": 0.20846918125999458, "learning_rate": 1.5723734208546972e-05, "log_odds_chosen": 0.14215087890625, "log_odds_ratio": -0.6336669921875, "logits/chosen": 0.23574066162109375, "logits/rejected": 0.335479736328125, "logps/chosen": -0.579833984375, "logps/rejected": -0.6025390625, "loss": 9.374, "nll_loss": 0.5863037109375, "rewards/accuracies": 0.625, "rewards/chosen": -0.05794525146484375, "rewards/margins": 0.00229644775390625, "rewards/rejected": -0.06024169921875, "step": 574 }, { "epoch": 0.3104123085228423, "grad_norm": 0.19304698999593906, "learning_rate": 1.5709748231329913e-05, "log_odds_chosen": 0.07666015625, "log_odds_ratio": -0.65966796875, "logits/chosen": 0.30060577392578125, "logits/rejected": 0.3789215087890625, "logps/chosen": -0.5177001953125, "logps/rejected": -0.53271484375, "loss": 8.9058, "nll_loss": 0.5299072265625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05176544189453125, "rewards/margins": 0.00150299072265625, "rewards/rejected": -0.0532684326171875, "step": 575 }, { "epoch": 0.3109521560159255, "grad_norm": 0.1969758534567216, "learning_rate": 1.5695745663366317e-05, "log_odds_chosen": 0.05078125, "log_odds_ratio": -0.672119140625, "logits/chosen": 0.3585662841796875, "logits/rejected": 0.48981475830078125, "logps/chosen": -0.58966064453125, "logps/rejected": -0.5877685546875, "loss": 9.3174, "nll_loss": 0.59588623046875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0589752197265625, "rewards/margins": -0.0002288818359375, "rewards/rejected": -0.058746337890625, "step": 576 }, { "epoch": 0.3114920035090087, "grad_norm": 0.18999016265495092, "learning_rate": 1.5681726545343278e-05, "log_odds_chosen": 0.12872314453125, "log_odds_ratio": -0.635498046875, "logits/chosen": -0.02364349365234375, "logits/rejected": 0.03183460235595703, "logps/chosen": -0.36737060546875, "logps/rejected": -0.40447998046875, "loss": 8.6909, "nll_loss": 0.37115478515625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.036712646484375, "rewards/margins": 0.00372314453125, "rewards/rejected": -0.040435791015625, "step": 577 }, { "epoch": 0.3120318510020919, "grad_norm": 0.19696471072656033, "learning_rate": 1.566769091799598e-05, "log_odds_chosen": 0.000732421875, "log_odds_ratio": -0.69482421875, "logits/chosen": 0.271392822265625, "logits/rejected": 0.39086151123046875, "logps/chosen": -0.460205078125, "logps/rejected": -0.461181640625, "loss": 8.9692, "nll_loss": 0.466552734375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0460357666015625, "rewards/margins": 6.866455078125e-05, "rewards/rejected": -0.04610443115234375, "step": 578 }, { "epoch": 0.31257169849517513, "grad_norm": 0.20687004594364422, "learning_rate": 1.5653638822107572e-05, "log_odds_chosen": 0.06201171875, "log_odds_ratio": -0.66455078125, "logits/chosen": 0.24859619140625, "logits/rejected": 0.335235595703125, "logps/chosen": -0.4515380859375, "logps/rejected": -0.4722900390625, "loss": 9.6074, "nll_loss": 0.4539794921875, "rewards/accuracies": 0.75, "rewards/chosen": -0.04514312744140625, "rewards/margins": 0.00205230712890625, "rewards/rejected": -0.0471954345703125, "step": 579 }, { "epoch": 0.3131115459882583, "grad_norm": 0.2002116789642058, "learning_rate": 1.5639570298509067e-05, "log_odds_chosen": 0.0848388671875, "log_odds_ratio": -0.653564453125, "logits/chosen": 0.2458648681640625, "logits/rejected": 0.314056396484375, "logps/chosen": -0.47772216796875, "logps/rejected": -0.5018310546875, "loss": 9.4302, "nll_loss": 0.49371337890625, "rewards/accuracies": 0.75, "rewards/chosen": -0.04775238037109375, "rewards/margins": 0.002410888671875, "rewards/rejected": -0.05016326904296875, "step": 580 }, { "epoch": 0.3136513934813415, "grad_norm": 0.219002286761628, "learning_rate": 1.5625485388079197e-05, "log_odds_chosen": 0.0943603515625, "log_odds_ratio": -0.64892578125, "logits/chosen": 0.103302001953125, "logits/rejected": 0.17211151123046875, "logps/chosen": -0.3896484375, "logps/rejected": -0.4202880859375, "loss": 9.6543, "nll_loss": 0.394287109375, "rewards/accuracies": 0.875, "rewards/chosen": -0.03894805908203125, "rewards/margins": 0.00308990478515625, "rewards/rejected": -0.0420379638671875, "step": 581 }, { "epoch": 0.31419124097442475, "grad_norm": 0.19835827446760687, "learning_rate": 1.5611384131744317e-05, "log_odds_chosen": 0.0433349609375, "log_odds_ratio": -0.6748046875, "logits/chosen": 0.3829460144042969, "logits/rejected": 0.4387969970703125, "logps/chosen": -0.4617919921875, "logps/rejected": -0.473876953125, "loss": 9.5312, "nll_loss": 0.470458984375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.046173095703125, "rewards/margins": 0.0011749267578125, "rewards/rejected": -0.0473480224609375, "step": 582 }, { "epoch": 0.3147310884675079, "grad_norm": 0.17553665851174127, "learning_rate": 1.5597266570478283e-05, "log_odds_chosen": 0.01025390625, "log_odds_ratio": -0.6884765625, "logits/chosen": 0.5839996337890625, "logits/rejected": 0.6658782958984375, "logps/chosen": -0.49609375, "logps/rejected": -0.494140625, "loss": 9.0513, "nll_loss": 0.50927734375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04962158203125, "rewards/margins": -0.00017547607421875, "rewards/rejected": -0.04944610595703125, "step": 583 }, { "epoch": 0.31527093596059114, "grad_norm": 0.1771200444536466, "learning_rate": 1.5583132745302316e-05, "log_odds_chosen": 0.035888671875, "log_odds_ratio": -0.6767578125, "logits/chosen": 0.27877044677734375, "logits/rejected": 0.38803672790527344, "logps/chosen": -0.5048828125, "logps/rejected": -0.509033203125, "loss": 8.7266, "nll_loss": 0.51318359375, "rewards/accuracies": 0.5, "rewards/chosen": -0.050445556640625, "rewards/margins": 0.0004425048828125, "rewards/rejected": -0.0508880615234375, "step": 584 }, { "epoch": 0.31581078345367436, "grad_norm": 0.20622315923078702, "learning_rate": 1.5568982697284905e-05, "log_odds_chosen": 0.02197265625, "log_odds_ratio": -0.6884765625, "logits/chosen": 0.4137296676635742, "logits/rejected": 0.5669403076171875, "logps/chosen": -0.62200927734375, "logps/rejected": -0.6004638671875, "loss": 10.4863, "nll_loss": 0.62603759765625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.06219482421875, "rewards/margins": -0.002197265625, "rewards/rejected": -0.05999755859375, "step": 585 }, { "epoch": 0.3163506309467575, "grad_norm": 0.17646063060449962, "learning_rate": 1.555481646754167e-05, "log_odds_chosen": 0.0592041015625, "log_odds_ratio": -0.666748046875, "logits/chosen": 0.4006681442260742, "logits/rejected": 0.4408531188964844, "logps/chosen": -0.4703369140625, "logps/rejected": -0.4853515625, "loss": 8.8359, "nll_loss": 0.47998046875, "rewards/accuracies": 0.5, "rewards/chosen": -0.0469970703125, "rewards/margins": 0.001556396484375, "rewards/rejected": -0.048553466796875, "step": 586 }, { "epoch": 0.31689047843984075, "grad_norm": 0.20365374872660763, "learning_rate": 1.554063409723526e-05, "log_odds_chosen": 0.052734375, "log_odds_ratio": -0.669921875, "logits/chosen": 0.39178466796875, "logits/rejected": 0.48047637939453125, "logps/chosen": -0.4781494140625, "logps/rejected": -0.4901123046875, "loss": 9.6318, "nll_loss": 0.482421875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.047821044921875, "rewards/margins": 0.00121307373046875, "rewards/rejected": -0.04903411865234375, "step": 587 }, { "epoch": 0.317430325932924, "grad_norm": 0.18468712789263766, "learning_rate": 1.5526435627575208e-05, "log_odds_chosen": 0.0521240234375, "log_odds_ratio": -0.669189453125, "logits/chosen": 0.23681640625, "logits/rejected": 0.3271293640136719, "logps/chosen": -0.44158935546875, "logps/rejected": -0.45635986328125, "loss": 8.5903, "nll_loss": 0.45928955078125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0441436767578125, "rewards/margins": 0.001495361328125, "rewards/rejected": -0.0456390380859375, "step": 588 }, { "epoch": 0.31797017342600714, "grad_norm": 0.2078890122091466, "learning_rate": 1.5512221099817847e-05, "log_odds_chosen": 0.07080078125, "log_odds_ratio": -0.661865234375, "logits/chosen": 0.22308349609375, "logits/rejected": 0.4066925048828125, "logps/chosen": -0.44696044921875, "logps/rejected": -0.46600341796875, "loss": 10.0879, "nll_loss": 0.46417236328125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04471588134765625, "rewards/margins": 0.0019073486328125, "rewards/rejected": -0.04662322998046875, "step": 589 }, { "epoch": 0.31851002091909036, "grad_norm": 0.17816341543088, "learning_rate": 1.5497990555266152e-05, "log_odds_chosen": 0.0457763671875, "log_odds_ratio": -0.672119140625, "logits/chosen": 0.3589668273925781, "logits/rejected": 0.470489501953125, "logps/chosen": -0.4468994140625, "logps/rejected": -0.457763671875, "loss": 8.8633, "nll_loss": 0.4561767578125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04468536376953125, "rewards/margins": 0.0010986328125, "rewards/rejected": -0.04578399658203125, "step": 590 }, { "epoch": 0.3190498684121736, "grad_norm": 0.1955152291957936, "learning_rate": 1.5483744035269648e-05, "log_odds_chosen": 0.0406494140625, "log_odds_ratio": -0.67431640625, "logits/chosen": 0.15837860107421875, "logits/rejected": 0.2210540771484375, "logps/chosen": -0.395263671875, "logps/rejected": -0.4056396484375, "loss": 8.6323, "nll_loss": 0.406005859375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0395355224609375, "rewards/margins": 0.00103759765625, "rewards/rejected": -0.0405731201171875, "step": 591 }, { "epoch": 0.31958971590525675, "grad_norm": 0.19154226023487847, "learning_rate": 1.5469481581224274e-05, "log_odds_chosen": 0.0843505859375, "log_odds_ratio": -0.654541015625, "logits/chosen": 0.21269607543945312, "logits/rejected": 0.2927360534667969, "logps/chosen": -0.41900634765625, "logps/rejected": -0.44287109375, "loss": 8.9863, "nll_loss": 0.4261474609375, "rewards/accuracies": 0.75, "rewards/chosen": -0.04186248779296875, "rewards/margins": 0.00243377685546875, "rewards/rejected": -0.0442962646484375, "step": 592 }, { "epoch": 0.32012956339834, "grad_norm": 0.1845220167330458, "learning_rate": 1.5455203234572265e-05, "log_odds_chosen": 0.11236572265625, "log_odds_ratio": -0.639404296875, "logits/chosen": 0.08260726928710938, "logits/rejected": 0.1087799072265625, "logps/chosen": -0.366943359375, "logps/rejected": -0.39801025390625, "loss": 8.8257, "nll_loss": 0.36993408203125, "rewards/accuracies": 0.875, "rewards/chosen": -0.0366973876953125, "rewards/margins": 0.00310516357421875, "rewards/rejected": -0.03980255126953125, "step": 593 }, { "epoch": 0.32066941089142315, "grad_norm": 0.19011451279183633, "learning_rate": 1.544090903680205e-05, "log_odds_chosen": 0.0355224609375, "log_odds_ratio": -0.677734375, "logits/chosen": 0.36932373046875, "logits/rejected": 0.4107818603515625, "logps/chosen": -0.465576171875, "logps/rejected": -0.4715576171875, "loss": 9.4961, "nll_loss": 0.4693603515625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.046539306640625, "rewards/margins": 0.00067138671875, "rewards/rejected": -0.047210693359375, "step": 594 }, { "epoch": 0.32120925838450637, "grad_norm": 0.18827766122211953, "learning_rate": 1.5426599029448103e-05, "log_odds_chosen": 0.0487060546875, "log_odds_ratio": -0.6719970703125, "logits/chosen": 0.5572509765625, "logits/rejected": 0.663482666015625, "logps/chosen": -0.485107421875, "logps/rejected": -0.4937744140625, "loss": 8.8623, "nll_loss": 0.49578857421875, "rewards/accuracies": 0.625, "rewards/chosen": -0.04852294921875, "rewards/margins": 0.0008392333984375, "rewards/rejected": -0.0493621826171875, "step": 595 }, { "epoch": 0.3217491058775896, "grad_norm": 0.18629952188463753, "learning_rate": 1.541227325409084e-05, "log_odds_chosen": 0.0335693359375, "log_odds_ratio": -0.681640625, "logits/chosen": 0.6077880859375, "logits/rejected": 0.727783203125, "logps/chosen": -0.5853271484375, "logps/rejected": -0.603515625, "loss": 8.9121, "nll_loss": 0.6060791015625, "rewards/accuracies": 0.5, "rewards/chosen": -0.0585174560546875, "rewards/margins": 0.0018310546875, "rewards/rejected": -0.0603485107421875, "step": 596 }, { "epoch": 0.32228895337067276, "grad_norm": 0.17995400831587982, "learning_rate": 1.5397931752356492e-05, "log_odds_chosen": 0.013916015625, "log_odds_ratio": -0.686767578125, "logits/chosen": 0.470550537109375, "logits/rejected": 0.6363258361816406, "logps/chosen": -0.542236328125, "logps/rejected": -0.54541015625, "loss": 8.8728, "nll_loss": 0.5469970703125, "rewards/accuracies": 0.625, "rewards/chosen": -0.05420684814453125, "rewards/margins": 0.00037384033203125, "rewards/rejected": -0.0545806884765625, "step": 597 }, { "epoch": 0.322828800863756, "grad_norm": 0.16900538185375544, "learning_rate": 1.538357456591699e-05, "log_odds_chosen": 0.081298828125, "log_odds_ratio": -0.6552734375, "logits/chosen": 0.4727325439453125, "logits/rejected": 0.5369968414306641, "logps/chosen": -0.47589111328125, "logps/rejected": -0.499755859375, "loss": 8.2046, "nll_loss": 0.48333740234375, "rewards/accuracies": 0.625, "rewards/chosen": -0.04758453369140625, "rewards/margins": 0.00240325927734375, "rewards/rejected": -0.04998779296875, "step": 598 }, { "epoch": 0.3233686483568392, "grad_norm": 0.18287234631440458, "learning_rate": 1.536920173648984e-05, "log_odds_chosen": 0.0269775390625, "log_odds_ratio": -0.681396484375, "logits/chosen": 0.41705322265625, "logits/rejected": 0.5869140625, "logps/chosen": -0.4874267578125, "logps/rejected": -0.492919921875, "loss": 9.3359, "nll_loss": 0.4947509765625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.048736572265625, "rewards/margins": 0.00054168701171875, "rewards/rejected": -0.04927825927734375, "step": 599 }, { "epoch": 0.3239084958499224, "grad_norm": 0.19856660208481694, "learning_rate": 1.5354813305837993e-05, "log_odds_chosen": 0.040283203125, "log_odds_ratio": -0.677001953125, "logits/chosen": 0.580322265625, "logits/rejected": 0.58184814453125, "logps/chosen": -0.5538330078125, "logps/rejected": -0.557861328125, "loss": 9.915, "nll_loss": 0.559326171875, "rewards/accuracies": 0.625, "rewards/chosen": -0.0554046630859375, "rewards/margins": 0.0003814697265625, "rewards/rejected": -0.0557861328125, "step": 600 }, { "epoch": 0.3244483433430056, "grad_norm": 0.19382179030083652, "learning_rate": 1.534040931576974e-05, "log_odds_chosen": 0.0419921875, "log_odds_ratio": -0.67333984375, "logits/chosen": 0.45543670654296875, "logits/rejected": 0.5796432495117188, "logps/chosen": -0.5135498046875, "logps/rejected": -0.5264892578125, "loss": 9.5405, "nll_loss": 0.523193359375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.051361083984375, "rewards/margins": 0.0012664794921875, "rewards/rejected": -0.0526275634765625, "step": 601 }, { "epoch": 0.3249881908360888, "grad_norm": 0.20030369348339208, "learning_rate": 1.5325989808138582e-05, "log_odds_chosen": 0.021728515625, "log_odds_ratio": -0.685546875, "logits/chosen": 0.269195556640625, "logits/rejected": 0.40680694580078125, "logps/chosen": -0.52264404296875, "logps/rejected": -0.520263671875, "loss": 10.123, "nll_loss": 0.53143310546875, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0522918701171875, "rewards/margins": -0.0002593994140625, "rewards/rejected": -0.052032470703125, "step": 602 }, { "epoch": 0.325528038329172, "grad_norm": 0.17939150128867173, "learning_rate": 1.531155482484311e-05, "log_odds_chosen": 0.103271484375, "log_odds_ratio": -0.646240234375, "logits/chosen": 0.3368644714355469, "logits/rejected": 0.400726318359375, "logps/chosen": -0.45166015625, "logps/rejected": -0.48291015625, "loss": 9.252, "nll_loss": 0.4617919921875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0451507568359375, "rewards/margins": 0.003143310546875, "rewards/rejected": -0.0482940673828125, "step": 603 }, { "epoch": 0.3260678858222552, "grad_norm": 0.20442783211486898, "learning_rate": 1.5297104407826876e-05, "log_odds_chosen": 0.09130859375, "log_odds_ratio": -0.6513671875, "logits/chosen": 0.19122695922851562, "logits/rejected": 0.23053741455078125, "logps/chosen": -0.4453125, "logps/rejected": -0.477783203125, "loss": 9.377, "nll_loss": 0.4461669921875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.044525146484375, "rewards/margins": 0.003265380859375, "rewards/rejected": -0.04779052734375, "step": 604 }, { "epoch": 0.32660773331533843, "grad_norm": 0.1796140891754971, "learning_rate": 1.528263859907829e-05, "log_odds_chosen": 0.0733642578125, "log_odds_ratio": -0.662841796875, "logits/chosen": 0.3775467872619629, "logits/rejected": 0.4030609130859375, "logps/chosen": -0.4893798828125, "logps/rejected": -0.5087890625, "loss": 9.4858, "nll_loss": 0.4898681640625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0489654541015625, "rewards/margins": 0.00189208984375, "rewards/rejected": -0.0508575439453125, "step": 605 }, { "epoch": 0.3271475808084216, "grad_norm": 0.18540154214364285, "learning_rate": 1.526815744063047e-05, "log_odds_chosen": 0.036376953125, "log_odds_ratio": -0.677490234375, "logits/chosen": 0.4252777099609375, "logits/rejected": 0.535247802734375, "logps/chosen": -0.5447998046875, "logps/rejected": -0.55224609375, "loss": 8.9888, "nll_loss": 0.55615234375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05448150634765625, "rewards/margins": 0.000762939453125, "rewards/rejected": -0.05524444580078125, "step": 606 }, { "epoch": 0.3276874283015048, "grad_norm": 0.17871109009218172, "learning_rate": 1.5253660974561147e-05, "log_odds_chosen": 0.1103515625, "log_odds_ratio": -0.6463623046875, "logits/chosen": 0.2970706820487976, "logits/rejected": 0.3648509979248047, "logps/chosen": -0.4647216796875, "logps/rejected": -0.501220703125, "loss": 8.2212, "nll_loss": 0.47222900390625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04645538330078125, "rewards/margins": 0.00365447998046875, "rewards/rejected": -0.05010986328125, "step": 607 }, { "epoch": 0.32822727579458805, "grad_norm": 0.21723173613972283, "learning_rate": 1.5239149242992528e-05, "log_odds_chosen": 0.085693359375, "log_odds_ratio": -0.65771484375, "logits/chosen": 0.229461669921875, "logits/rejected": 0.3069915771484375, "logps/chosen": -0.6134033203125, "logps/rejected": -0.6279296875, "loss": 9.4341, "nll_loss": 0.6099853515625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.061309814453125, "rewards/margins": 0.00152587890625, "rewards/rejected": -0.062835693359375, "step": 608 }, { "epoch": 0.3287671232876712, "grad_norm": 0.19873407656432915, "learning_rate": 1.5224622288091176e-05, "log_odds_chosen": 0.0982666015625, "log_odds_ratio": -0.651611328125, "logits/chosen": 0.15082550048828125, "logits/rejected": 0.3106098175048828, "logps/chosen": -0.584716796875, "logps/rejected": -0.5980224609375, "loss": 10.2344, "nll_loss": 0.5882568359375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0584716796875, "rewards/margins": 0.001312255859375, "rewards/rejected": -0.059783935546875, "step": 609 }, { "epoch": 0.32930697078075444, "grad_norm": 0.17766398140420409, "learning_rate": 1.5210080152067885e-05, "log_odds_chosen": 0.098876953125, "log_odds_ratio": -0.646240234375, "logits/chosen": 0.6004180908203125, "logits/rejected": 0.6493854522705078, "logps/chosen": -0.4913330078125, "logps/rejected": -0.5260009765625, "loss": 8.9863, "nll_loss": 0.501953125, "rewards/accuracies": 0.875, "rewards/chosen": -0.04907989501953125, "rewards/margins": 0.00351715087890625, "rewards/rejected": -0.0525970458984375, "step": 610 }, { "epoch": 0.32984681827383766, "grad_norm": 0.18774750881720803, "learning_rate": 1.5195522877177568e-05, "log_odds_chosen": 0.15045166015625, "log_odds_ratio": -0.62451171875, "logits/chosen": 0.20621490478515625, "logits/rejected": 0.3030548095703125, "logps/chosen": -0.5059814453125, "logps/rejected": -0.55126953125, "loss": 9.4766, "nll_loss": 0.509521484375, "rewards/accuracies": 0.75, "rewards/chosen": -0.05057525634765625, "rewards/margins": 0.00452423095703125, "rewards/rejected": -0.0550994873046875, "step": 611 }, { "epoch": 0.33038666576692083, "grad_norm": 0.20116477561910537, "learning_rate": 1.5180950505719125e-05, "log_odds_chosen": 0.1414794921875, "log_odds_ratio": -0.628662109375, "logits/chosen": 0.517852783203125, "logits/rejected": 0.447784423828125, "logps/chosen": -0.59375, "logps/rejected": -0.657958984375, "loss": 9.9058, "nll_loss": 0.5960693359375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.059356689453125, "rewards/margins": 0.006500244140625, "rewards/rejected": -0.06585693359375, "step": 612 }, { "epoch": 0.33092651326000405, "grad_norm": 0.20297432823483194, "learning_rate": 1.5166363080035313e-05, "log_odds_chosen": 0.1134033203125, "log_odds_ratio": -0.6451416015625, "logits/chosen": 0.2873659133911133, "logits/rejected": 0.40936279296875, "logps/chosen": -0.559814453125, "logps/rejected": -0.588623046875, "loss": 9.1016, "nll_loss": 0.5872802734375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05596160888671875, "rewards/margins": 0.00284576416015625, "rewards/rejected": -0.058807373046875, "step": 613 }, { "epoch": 0.3314663607530873, "grad_norm": 0.19426538190884582, "learning_rate": 1.5151760642512648e-05, "log_odds_chosen": 0.2060546875, "log_odds_ratio": -0.59765625, "logits/chosen": 0.1241607666015625, "logits/rejected": 0.1672954559326172, "logps/chosen": -0.412841796875, "logps/rejected": -0.4853515625, "loss": 9.1016, "nll_loss": 0.42138671875, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0412750244140625, "rewards/margins": 0.00726318359375, "rewards/rejected": -0.0485382080078125, "step": 614 }, { "epoch": 0.33200620824617044, "grad_norm": 0.22188188212587992, "learning_rate": 1.5137143235581249e-05, "log_odds_chosen": 0.1038818359375, "log_odds_ratio": -0.65478515625, "logits/chosen": 0.10703277587890625, "logits/rejected": 0.1335897445678711, "logps/chosen": -0.5711669921875, "logps/rejected": -0.588623046875, "loss": 9.6353, "nll_loss": 0.566650390625, "rewards/accuracies": 0.625, "rewards/chosen": -0.05709075927734375, "rewards/margins": 0.00180816650390625, "rewards/rejected": -0.05889892578125, "step": 615 }, { "epoch": 0.33254605573925367, "grad_norm": 0.20165404806905948, "learning_rate": 1.512251090171474e-05, "log_odds_chosen": -0.0006103515625, "log_odds_ratio": -0.70654296875, "logits/chosen": 0.2744903564453125, "logits/rejected": 0.28455638885498047, "logps/chosen": -0.5093994140625, "logps/rejected": -0.496826171875, "loss": 8.7383, "nll_loss": 0.51483154296875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.050933837890625, "rewards/margins": -0.00127410888671875, "rewards/rejected": -0.04965972900390625, "step": 616 }, { "epoch": 0.3330859032323369, "grad_norm": 0.19404932166963562, "learning_rate": 1.5107863683430121e-05, "log_odds_chosen": 0.1064453125, "log_odds_ratio": -0.6497802734375, "logits/chosen": 0.423583984375, "logits/rejected": 0.494354248046875, "logps/chosen": -0.62060546875, "logps/rejected": -0.6429443359375, "loss": 10.2598, "nll_loss": 0.6209716796875, "rewards/accuracies": 0.75, "rewards/chosen": -0.0621185302734375, "rewards/margins": 0.0021820068359375, "rewards/rejected": -0.064300537109375, "step": 617 }, { "epoch": 0.33362575072542006, "grad_norm": 0.18664473349061916, "learning_rate": 1.5093201623287631e-05, "log_odds_chosen": 0.1339111328125, "log_odds_ratio": -0.6331787109375, "logits/chosen": 0.390228271484375, "logits/rejected": 0.4163951873779297, "logps/chosen": -0.4603271484375, "logps/rejected": -0.505615234375, "loss": 9.2012, "nll_loss": 0.4638671875, "rewards/accuracies": 0.75, "rewards/chosen": -0.04602813720703125, "rewards/margins": 0.00457000732421875, "rewards/rejected": -0.05059814453125, "step": 618 }, { "epoch": 0.3341655982185033, "grad_norm": 0.2075432376372977, "learning_rate": 1.5078524763890647e-05, "log_odds_chosen": 0.13623046875, "log_odds_ratio": -0.6298828125, "logits/chosen": 0.36102294921875, "logits/rejected": 0.4209604263305664, "logps/chosen": -0.49560546875, "logps/rejected": -0.54638671875, "loss": 9.3301, "nll_loss": 0.5064697265625, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04953765869140625, "rewards/margins": 0.00507354736328125, "rewards/rejected": -0.0546112060546875, "step": 619 }, { "epoch": 0.3347054457115865, "grad_norm": 0.1804685099545009, "learning_rate": 1.506383314788554e-05, "log_odds_chosen": 0.1246337890625, "log_odds_ratio": -0.647216796875, "logits/chosen": 0.26357269287109375, "logits/rejected": 0.2999114990234375, "logps/chosen": -0.45458984375, "logps/rejected": -0.4915771484375, "loss": 8.4175, "nll_loss": 0.4599609375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0454254150390625, "rewards/margins": 0.0037384033203125, "rewards/rejected": -0.049163818359375, "step": 620 }, { "epoch": 0.33524529320466967, "grad_norm": 0.20885029898844928, "learning_rate": 1.5049126817961561e-05, "log_odds_chosen": 0.1396484375, "log_odds_ratio": -0.630615234375, "logits/chosen": 0.2408447265625, "logits/rejected": 0.26930999755859375, "logps/chosen": -0.4288330078125, "logps/rejected": -0.4755859375, "loss": 10.0908, "nll_loss": 0.4324951171875, "rewards/accuracies": 0.875, "rewards/chosen": -0.04288482666015625, "rewards/margins": 0.00464630126953125, "rewards/rejected": -0.0475311279296875, "step": 621 }, { "epoch": 0.3357851406977529, "grad_norm": 0.19422158930604252, "learning_rate": 1.5034405816850717e-05, "log_odds_chosen": 0.10699462890625, "log_odds_ratio": -0.642578125, "logits/chosen": 0.3348541259765625, "logits/rejected": 0.421173095703125, "logps/chosen": -0.44757080078125, "logps/rejected": -0.4815673828125, "loss": 9.377, "nll_loss": 0.4530029296875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0447540283203125, "rewards/margins": 0.00341033935546875, "rewards/rejected": -0.04816436767578125, "step": 622 }, { "epoch": 0.3363249881908361, "grad_norm": 0.19370030200877636, "learning_rate": 1.5019670187327642e-05, "log_odds_chosen": 0.1180419921875, "log_odds_ratio": -0.6390380859375, "logits/chosen": 0.2547187805175781, "logits/rejected": 0.2643890380859375, "logps/chosen": -0.43017578125, "logps/rejected": -0.4698486328125, "loss": 9.4141, "nll_loss": 0.4306640625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04305267333984375, "rewards/margins": 0.00391387939453125, "rewards/rejected": -0.046966552734375, "step": 623 }, { "epoch": 0.3368648356839193, "grad_norm": 0.19525213272465, "learning_rate": 1.5004919972209475e-05, "log_odds_chosen": 0.1204833984375, "log_odds_ratio": -0.635986328125, "logits/chosen": 0.3377494812011719, "logits/rejected": 0.436309814453125, "logps/chosen": -0.447265625, "logps/rejected": -0.4857177734375, "loss": 9.2383, "nll_loss": 0.4603271484375, "rewards/accuracies": 0.875, "rewards/chosen": -0.044708251953125, "rewards/margins": 0.0038604736328125, "rewards/rejected": -0.0485687255859375, "step": 624 }, { "epoch": 0.3374046831770025, "grad_norm": 0.19096761909051913, "learning_rate": 1.4990155214355744e-05, "log_odds_chosen": 0.1553955078125, "log_odds_ratio": -0.62255859375, "logits/chosen": 0.39794921875, "logits/rejected": 0.567901611328125, "logps/chosen": -0.51031494140625, "logps/rejected": -0.5574951171875, "loss": 8.8105, "nll_loss": 0.5177001953125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05101776123046875, "rewards/margins": 0.0047149658203125, "rewards/rejected": -0.05573272705078125, "step": 625 }, { "epoch": 0.3379445306700857, "grad_norm": 0.2135149412830349, "learning_rate": 1.497537595666822e-05, "log_odds_chosen": 0.09442138671875, "log_odds_ratio": -0.6505126953125, "logits/chosen": 0.15706253051757812, "logits/rejected": 0.19390869140625, "logps/chosen": -0.4630126953125, "logps/rejected": -0.48846435546875, "loss": 9.5376, "nll_loss": 0.475830078125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0463104248046875, "rewards/margins": 0.0025177001953125, "rewards/rejected": -0.048828125, "step": 626 }, { "epoch": 0.3384843781631689, "grad_norm": 0.1763161830779624, "learning_rate": 1.496058224209082e-05, "log_odds_chosen": 0.068115234375, "log_odds_ratio": -0.662353515625, "logits/chosen": 0.4563484191894531, "logits/rejected": 0.5389232635498047, "logps/chosen": -0.5341796875, "logps/rejected": -0.547119140625, "loss": 8.4624, "nll_loss": 0.5347900390625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05339813232421875, "rewards/margins": 0.00131988525390625, "rewards/rejected": -0.054718017578125, "step": 627 }, { "epoch": 0.3390242256562521, "grad_norm": 0.1771710541966554, "learning_rate": 1.4945774113609459e-05, "log_odds_chosen": 0.058837890625, "log_odds_ratio": -0.66650390625, "logits/chosen": 0.5242767333984375, "logits/rejected": 0.605621337890625, "logps/chosen": -0.596923828125, "logps/rejected": -0.607421875, "loss": 9.3977, "nll_loss": 0.60888671875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0596923828125, "rewards/margins": 0.00103759765625, "rewards/rejected": -0.06072998046875, "step": 628 }, { "epoch": 0.3395640731493353, "grad_norm": 0.18323127299051728, "learning_rate": 1.4930951614251936e-05, "log_odds_chosen": -0.00341796875, "log_odds_ratio": -0.69873046875, "logits/chosen": 0.13222503662109375, "logits/rejected": 0.229705810546875, "logps/chosen": -0.59375, "logps/rejected": -0.5758056640625, "loss": 9.3257, "nll_loss": 0.5999755859375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05937957763671875, "rewards/margins": -0.0017852783203125, "rewards/rejected": -0.05759429931640625, "step": 629 }, { "epoch": 0.3401039206424185, "grad_norm": 0.18362331644222485, "learning_rate": 1.4916114787087812e-05, "log_odds_chosen": 0.1307373046875, "log_odds_ratio": -0.633544921875, "logits/chosen": 0.4109039306640625, "logits/rejected": 0.49224281311035156, "logps/chosen": -0.46087646484375, "logps/rejected": -0.4979248046875, "loss": 9.1492, "nll_loss": 0.48870849609375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04608917236328125, "rewards/margins": 0.00370025634765625, "rewards/rejected": -0.0497894287109375, "step": 630 }, { "epoch": 0.34064376813550173, "grad_norm": 0.16353054061565406, "learning_rate": 1.4901263675228275e-05, "log_odds_chosen": 0.1016845703125, "log_odds_ratio": -0.6474609375, "logits/chosen": 0.1254425048828125, "logits/rejected": 0.1955718994140625, "logps/chosen": -0.4144287109375, "logps/rejected": -0.4464111328125, "loss": 8.1411, "nll_loss": 0.4248046875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04144287109375, "rewards/margins": 0.00319671630859375, "rewards/rejected": -0.04463958740234375, "step": 631 }, { "epoch": 0.3411836156285849, "grad_norm": 0.20190090685805068, "learning_rate": 1.4886398321826021e-05, "log_odds_chosen": 0.02093505859375, "log_odds_ratio": -0.6875, "logits/chosen": 0.006317138671875, "logits/rejected": 0.16498184204101562, "logps/chosen": -0.5372314453125, "logps/rejected": -0.53997802734375, "loss": 9.1514, "nll_loss": 0.5374755859375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0537261962890625, "rewards/margins": 0.00029754638671875, "rewards/rejected": -0.05402374267578125, "step": 632 }, { "epoch": 0.3417234631216681, "grad_norm": 0.19530562124292203, "learning_rate": 1.487151877007513e-05, "log_odds_chosen": 0.0333251953125, "log_odds_ratio": -0.68017578125, "logits/chosen": 0.5516510009765625, "logits/rejected": 0.6347427368164062, "logps/chosen": -0.578857421875, "logps/rejected": -0.576416015625, "loss": 9.4434, "nll_loss": 0.5906982421875, "rewards/accuracies": 0.5, "rewards/chosen": -0.05788421630859375, "rewards/margins": -0.0002899169921875, "rewards/rejected": -0.05759429931640625, "step": 633 }, { "epoch": 0.34226331061475135, "grad_norm": 0.1788929859886434, "learning_rate": 1.4856625063210933e-05, "log_odds_chosen": 0.1373291015625, "log_odds_ratio": -0.6322021484375, "logits/chosen": 0.38397216796875, "logits/rejected": 0.45440673828125, "logps/chosen": -0.46466064453125, "logps/rejected": -0.501220703125, "loss": 8.9365, "nll_loss": 0.4716796875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04648590087890625, "rewards/margins": 0.0036468505859375, "rewards/rejected": -0.05013275146484375, "step": 634 }, { "epoch": 0.3428031581078345, "grad_norm": 0.20055558020903447, "learning_rate": 1.4841717244509893e-05, "log_odds_chosen": 0.07958984375, "log_odds_ratio": -0.658447265625, "logits/chosen": 0.463104248046875, "logits/rejected": 0.5007057189941406, "logps/chosen": -0.4696044921875, "logps/rejected": -0.4876708984375, "loss": 10.1594, "nll_loss": 0.49151611328125, "rewards/accuracies": 0.625, "rewards/chosen": -0.0469512939453125, "rewards/margins": 0.0018768310546875, "rewards/rejected": -0.048828125, "step": 635 }, { "epoch": 0.34334300560091774, "grad_norm": 0.17963735155448857, "learning_rate": 1.4826795357289485e-05, "log_odds_chosen": 0.07080078125, "log_odds_ratio": -0.658447265625, "logits/chosen": 0.4367523193359375, "logits/rejected": 0.500640869140625, "logps/chosen": -0.4708251953125, "logps/rejected": -0.4954833984375, "loss": 8.615, "nll_loss": 0.4755859375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0470428466796875, "rewards/margins": 0.002471923828125, "rewards/rejected": -0.0495147705078125, "step": 636 }, { "epoch": 0.34388285309400096, "grad_norm": 0.18558122946645145, "learning_rate": 1.4811859444908053e-05, "log_odds_chosen": 0.099365234375, "log_odds_ratio": -0.646240234375, "logits/chosen": 0.3311767578125, "logits/rejected": 0.430938720703125, "logps/chosen": -0.4610595703125, "logps/rejected": -0.4952392578125, "loss": 8.9619, "nll_loss": 0.471435546875, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04607391357421875, "rewards/margins": 0.00344085693359375, "rewards/rejected": -0.0495147705078125, "step": 637 }, { "epoch": 0.34442270058708413, "grad_norm": 0.1761272452341179, "learning_rate": 1.4796909550764695e-05, "log_odds_chosen": 0.06268310546875, "log_odds_ratio": -0.6630859375, "logits/chosen": 0.45943450927734375, "logits/rejected": 0.4925994873046875, "logps/chosen": -0.4560546875, "logps/rejected": -0.47698974609375, "loss": 8.8682, "nll_loss": 0.46441650390625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0456085205078125, "rewards/margins": 0.00206756591796875, "rewards/rejected": -0.04767608642578125, "step": 638 }, { "epoch": 0.34496254808016735, "grad_norm": 0.20010292989160372, "learning_rate": 1.4781945718299141e-05, "log_odds_chosen": 0.0592041015625, "log_odds_ratio": -0.6663818359375, "logits/chosen": 0.45943450927734375, "logits/rejected": 0.5855865478515625, "logps/chosen": -0.6156005859375, "logps/rejected": -0.6309814453125, "loss": 9.7212, "nll_loss": 0.624267578125, "rewards/accuracies": 0.625, "rewards/chosen": -0.061553955078125, "rewards/margins": 0.0015716552734375, "rewards/rejected": -0.0631256103515625, "step": 639 }, { "epoch": 0.3455023955732506, "grad_norm": 0.1957418333722654, "learning_rate": 1.4766967990991624e-05, "log_odds_chosen": 0.0687255859375, "log_odds_ratio": -0.6622314453125, "logits/chosen": 0.54669189453125, "logits/rejected": 0.576324462890625, "logps/chosen": -0.541259765625, "logps/rejected": -0.5819091796875, "loss": 9.6328, "nll_loss": 0.546630859375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0541229248046875, "rewards/margins": 0.004058837890625, "rewards/rejected": -0.0581817626953125, "step": 640 }, { "epoch": 0.34604224306633374, "grad_norm": 0.19845567241144535, "learning_rate": 1.4751976412362732e-05, "log_odds_chosen": 0.0264892578125, "log_odds_ratio": -0.682373046875, "logits/chosen": 0.5384674072265625, "logits/rejected": 0.6127166748046875, "logps/chosen": -0.56689453125, "logps/rejected": -0.5654296875, "loss": 9.5923, "nll_loss": 0.587890625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05670166015625, "rewards/margins": -0.0001983642578125, "rewards/rejected": -0.0565032958984375, "step": 641 }, { "epoch": 0.34658209055941697, "grad_norm": 0.2052784353007714, "learning_rate": 1.4736971025973325e-05, "log_odds_chosen": 0.125, "log_odds_ratio": -0.6365966796875, "logits/chosen": 0.61199951171875, "logits/rejected": 0.6967315673828125, "logps/chosen": -0.523681640625, "logps/rejected": -0.560546875, "loss": 9.8979, "nll_loss": 0.54296875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05236053466796875, "rewards/margins": 0.00373077392578125, "rewards/rejected": -0.05609130859375, "step": 642 }, { "epoch": 0.3471219380525002, "grad_norm": 0.19902235851115566, "learning_rate": 1.472195187542437e-05, "log_odds_chosen": 0.0567626953125, "log_odds_ratio": -0.668212890625, "logits/chosen": 0.2769775390625, "logits/rejected": 0.3895282745361328, "logps/chosen": -0.544921875, "logps/rejected": -0.5557861328125, "loss": 9.1394, "nll_loss": 0.546142578125, "rewards/accuracies": 0.75, "rewards/chosen": -0.05450439453125, "rewards/margins": 0.0010986328125, "rewards/rejected": -0.05560302734375, "step": 643 }, { "epoch": 0.34766178554558336, "grad_norm": 0.19066208499405554, "learning_rate": 1.470691900435683e-05, "log_odds_chosen": 0.102294921875, "log_odds_ratio": -0.645263671875, "logits/chosen": 0.38336181640625, "logits/rejected": 0.519287109375, "logps/chosen": -0.5244140625, "logps/rejected": -0.56298828125, "loss": 9.9165, "nll_loss": 0.524658203125, "rewards/accuracies": 0.75, "rewards/chosen": -0.052459716796875, "rewards/margins": 0.0038604736328125, "rewards/rejected": -0.0563201904296875, "step": 644 }, { "epoch": 0.3482016330386666, "grad_norm": 0.1977813295771154, "learning_rate": 1.469187245645154e-05, "log_odds_chosen": 0.1148681640625, "log_odds_ratio": -0.6397705078125, "logits/chosen": 0.5164794921875, "logits/rejected": 0.602691650390625, "logps/chosen": -0.563720703125, "logps/rejected": -0.60693359375, "loss": 9.6689, "nll_loss": 0.5694580078125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05640411376953125, "rewards/margins": 0.00429534912109375, "rewards/rejected": -0.060699462890625, "step": 645 }, { "epoch": 0.3487414805317498, "grad_norm": 0.19173411134878474, "learning_rate": 1.4676812275429071e-05, "log_odds_chosen": 0.1236572265625, "log_odds_ratio": -0.637451171875, "logits/chosen": 0.1037750244140625, "logits/rejected": 0.16838836669921875, "logps/chosen": -0.479736328125, "logps/rejected": -0.5108642578125, "loss": 8.937, "nll_loss": 0.4813232421875, "rewards/accuracies": 0.875, "rewards/chosen": -0.04795074462890625, "rewards/margins": 0.00315093994140625, "rewards/rejected": -0.0511016845703125, "step": 646 }, { "epoch": 0.34928132802483297, "grad_norm": 0.19580820015833567, "learning_rate": 1.4661738505049608e-05, "log_odds_chosen": 0.004638671875, "log_odds_ratio": -0.7034912109375, "logits/chosen": 0.102783203125, "logits/rejected": 0.19185638427734375, "logps/chosen": -0.6900634765625, "logps/rejected": -0.66497802734375, "loss": 10.0547, "nll_loss": 0.6934814453125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0690460205078125, "rewards/margins": -0.002593994140625, "rewards/rejected": -0.0664520263671875, "step": 647 }, { "epoch": 0.3498211755179162, "grad_norm": 0.18282379674210608, "learning_rate": 1.4646651189112825e-05, "log_odds_chosen": 0.0863037109375, "log_odds_ratio": -0.65380859375, "logits/chosen": 0.10636138916015625, "logits/rejected": 0.1685943603515625, "logps/chosen": -0.463623046875, "logps/rejected": -0.495849609375, "loss": 8.7563, "nll_loss": 0.4676513671875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0463714599609375, "rewards/margins": 0.00318145751953125, "rewards/rejected": -0.04955291748046875, "step": 648 }, { "epoch": 0.3503610230109994, "grad_norm": 0.1782331169907957, "learning_rate": 1.4631550371457755e-05, "log_odds_chosen": -0.005615234375, "log_odds_ratio": -0.698486328125, "logits/chosen": 0.48016357421875, "logits/rejected": 0.600433349609375, "logps/chosen": -0.6314697265625, "logps/rejected": -0.6209716796875, "loss": 9.5278, "nll_loss": 0.646484375, "rewards/accuracies": 0.5, "rewards/chosen": -0.0630950927734375, "rewards/margins": -0.0009765625, "rewards/rejected": -0.0621185302734375, "step": 649 }, { "epoch": 0.3509008705040826, "grad_norm": 0.18527757706803538, "learning_rate": 1.4616436095962661e-05, "log_odds_chosen": 0.280029296875, "log_odds_ratio": -0.567626953125, "logits/chosen": 0.0919189453125, "logits/rejected": 0.120819091796875, "logps/chosen": -0.39178466796875, "logps/rejected": -0.47625732421875, "loss": 9.0493, "nll_loss": 0.39984130859375, "rewards/accuracies": 0.9375, "rewards/chosen": -0.03917694091796875, "rewards/margins": 0.00848388671875, "rewards/rejected": -0.04766082763671875, "step": 650 }, { "epoch": 0.3514407179971658, "grad_norm": 0.2253385844111147, "learning_rate": 1.460130840654491e-05, "log_odds_chosen": 0.197998046875, "log_odds_ratio": -0.60693359375, "logits/chosen": 0.2197265625, "logits/rejected": 0.284088134765625, "logps/chosen": -0.42584228515625, "logps/rejected": -0.4862060546875, "loss": 9.604, "nll_loss": 0.42626953125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04259490966796875, "rewards/margins": 0.00598907470703125, "rewards/rejected": -0.048583984375, "step": 651 }, { "epoch": 0.35198056549024903, "grad_norm": 0.195079691240852, "learning_rate": 1.4586167347160847e-05, "log_odds_chosen": 0.1072998046875, "log_odds_ratio": -0.642333984375, "logits/chosen": 0.34439849853515625, "logits/rejected": 0.424468994140625, "logps/chosen": -0.4561767578125, "logps/rejected": -0.494384765625, "loss": 8.3843, "nll_loss": 0.4656982421875, "rewards/accuracies": 0.875, "rewards/chosen": -0.0456085205078125, "rewards/margins": 0.003875732421875, "rewards/rejected": -0.0494842529296875, "step": 652 }, { "epoch": 0.3525204129833322, "grad_norm": 0.1931947971222194, "learning_rate": 1.4571012961805662e-05, "log_odds_chosen": 0.15521240234375, "log_odds_ratio": -0.624267578125, "logits/chosen": 0.2686653137207031, "logits/rejected": 0.35269808769226074, "logps/chosen": -0.469482421875, "logps/rejected": -0.517822265625, "loss": 9.1484, "nll_loss": 0.4876708984375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04698944091796875, "rewards/margins": 0.00481414794921875, "rewards/rejected": -0.0518035888671875, "step": 653 }, { "epoch": 0.3530602604764154, "grad_norm": 0.17463721638007523, "learning_rate": 1.4555845294513276e-05, "log_odds_chosen": 0.150634765625, "log_odds_ratio": -0.6304931640625, "logits/chosen": 0.32851409912109375, "logits/rejected": 0.345611572265625, "logps/chosen": -0.42510986328125, "logps/rejected": -0.46923828125, "loss": 8.6963, "nll_loss": 0.43182373046875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04251861572265625, "rewards/margins": 0.004425048828125, "rewards/rejected": -0.04694366455078125, "step": 654 }, { "epoch": 0.3536001079694986, "grad_norm": 0.21825798465929133, "learning_rate": 1.4540664389356193e-05, "log_odds_chosen": 0.03253173828125, "log_odds_ratio": -0.681884765625, "logits/chosen": 0.4101715087890625, "logits/rejected": 0.3421173095703125, "logps/chosen": -0.51617431640625, "logps/rejected": -0.52581787109375, "loss": 9.3027, "nll_loss": 0.5220947265625, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0516204833984375, "rewards/margins": 0.00101470947265625, "rewards/rejected": -0.05263519287109375, "step": 655 }, { "epoch": 0.3541399554625818, "grad_norm": 0.20799471885877055, "learning_rate": 1.4525470290445392e-05, "log_odds_chosen": 0.1795654296875, "log_odds_ratio": -0.611328125, "logits/chosen": 0.522186279296875, "logits/rejected": 0.6014404296875, "logps/chosen": -0.50732421875, "logps/rejected": -0.5682373046875, "loss": 9.8779, "nll_loss": 0.513427734375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0507354736328125, "rewards/margins": 0.006103515625, "rewards/rejected": -0.0568389892578125, "step": 656 }, { "epoch": 0.35467980295566504, "grad_norm": 0.19763542881554558, "learning_rate": 1.4510263041930173e-05, "log_odds_chosen": 0.1702880859375, "log_odds_ratio": -0.61572265625, "logits/chosen": 0.298736572265625, "logits/rejected": 0.40789794921875, "logps/chosen": -0.491455078125, "logps/rejected": -0.5411376953125, "loss": 9.125, "nll_loss": 0.49627685546875, "rewards/accuracies": 0.9375, "rewards/chosen": -0.0491180419921875, "rewards/margins": 0.0049591064453125, "rewards/rejected": -0.0540771484375, "step": 657 }, { "epoch": 0.3552196504487482, "grad_norm": 0.19567857181949713, "learning_rate": 1.4495042687998057e-05, "log_odds_chosen": 0.1900634765625, "log_odds_ratio": -0.606689453125, "logits/chosen": 0.42856597900390625, "logits/rejected": 0.5368118286132812, "logps/chosen": -0.4635009765625, "logps/rejected": -0.52734375, "loss": 8.4434, "nll_loss": 0.4683837890625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.046356201171875, "rewards/margins": 0.00634765625, "rewards/rejected": -0.052703857421875, "step": 658 }, { "epoch": 0.3557594979418314, "grad_norm": 0.1990858207032683, "learning_rate": 1.447980927287465e-05, "log_odds_chosen": 0.06182861328125, "log_odds_ratio": -0.66796875, "logits/chosen": 0.151885986328125, "logits/rejected": 0.2948150634765625, "logps/chosen": -0.5853271484375, "logps/rejected": -0.5869140625, "loss": 9.5864, "nll_loss": 0.579833984375, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05852508544921875, "rewards/margins": 0.00016021728515625, "rewards/rejected": -0.058685302734375, "step": 659 }, { "epoch": 0.35629934543491465, "grad_norm": 0.2200016084504429, "learning_rate": 1.4464562840823496e-05, "log_odds_chosen": 0.164794921875, "log_odds_ratio": -0.618408203125, "logits/chosen": 0.0579376220703125, "logits/rejected": 0.1985321044921875, "logps/chosen": -0.48944091796875, "logps/rejected": -0.5341796875, "loss": 9.5605, "nll_loss": 0.5030517578125, "rewards/accuracies": 0.875, "rewards/chosen": -0.04900360107421875, "rewards/margins": 0.00441741943359375, "rewards/rejected": -0.0534210205078125, "step": 660 }, { "epoch": 0.3568391929279978, "grad_norm": 0.17495354856821962, "learning_rate": 1.444930343614597e-05, "log_odds_chosen": 0.1226806640625, "log_odds_ratio": -0.63818359375, "logits/chosen": 0.2608642578125, "logits/rejected": 0.307220458984375, "logps/chosen": -0.5421142578125, "logps/rejected": -0.5732421875, "loss": 8.8872, "nll_loss": 0.5450439453125, "rewards/accuracies": 0.9375, "rewards/chosen": -0.05419921875, "rewards/margins": 0.0031280517578125, "rewards/rejected": -0.0573272705078125, "step": 661 }, { "epoch": 0.35737904042108104, "grad_norm": 0.19657916855347285, "learning_rate": 1.4434031103181141e-05, "log_odds_chosen": 0.190185546875, "log_odds_ratio": -0.6048583984375, "logits/chosen": 0.0862884521484375, "logits/rejected": 0.1977691650390625, "logps/chosen": -0.4197998046875, "logps/rejected": -0.4793701171875, "loss": 8.7554, "nll_loss": 0.4246826171875, "rewards/accuracies": 0.875, "rewards/chosen": -0.04198455810546875, "rewards/margins": 0.0059661865234375, "rewards/rejected": -0.04795074462890625, "step": 662 }, { "epoch": 0.35791888791416426, "grad_norm": 0.19884187071696569, "learning_rate": 1.4418745886305641e-05, "log_odds_chosen": 0.1580810546875, "log_odds_ratio": -0.621826171875, "logits/chosen": 0.5800018310546875, "logits/rejected": 0.67529296875, "logps/chosen": -0.5294189453125, "logps/rejected": -0.5897216796875, "loss": 9.6167, "nll_loss": 0.53076171875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05294036865234375, "rewards/margins": 0.00601959228515625, "rewards/rejected": -0.0589599609375, "step": 663 }, { "epoch": 0.35845873540724743, "grad_norm": 0.1718753241794505, "learning_rate": 1.4403447829933541e-05, "log_odds_chosen": 0.056640625, "log_odds_ratio": -0.669189453125, "logits/chosen": 0.66827392578125, "logits/rejected": 0.7762451171875, "logps/chosen": -0.59039306640625, "logps/rejected": -0.598876953125, "loss": 9.3323, "nll_loss": 0.5953369140625, "rewards/accuracies": 0.5, "rewards/chosen": -0.059051513671875, "rewards/margins": 0.0008544921875, "rewards/rejected": -0.059906005859375, "step": 664 }, { "epoch": 0.35899858290033065, "grad_norm": 0.1953396727113865, "learning_rate": 1.4388136978516219e-05, "log_odds_chosen": 0.0980224609375, "log_odds_ratio": -0.648681640625, "logits/chosen": 0.5774688720703125, "logits/rejected": 0.712677001953125, "logps/chosen": -0.55572509765625, "logps/rejected": -0.5848388671875, "loss": 9.4419, "nll_loss": 0.5762939453125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05554962158203125, "rewards/margins": 0.00293731689453125, "rewards/rejected": -0.0584869384765625, "step": 665 }, { "epoch": 0.3595384303934139, "grad_norm": 0.20614853731879934, "learning_rate": 1.4372813376542232e-05, "log_odds_chosen": 0.174072265625, "log_odds_ratio": -0.6143798828125, "logits/chosen": 0.33477783203125, "logits/rejected": 0.4582974910736084, "logps/chosen": -0.513671875, "logps/rejected": -0.5703125, "loss": 8.875, "nll_loss": 0.5201416015625, "rewards/accuracies": 0.9375, "rewards/chosen": -0.05136871337890625, "rewards/margins": 0.00566864013671875, "rewards/rejected": -0.057037353515625, "step": 666 }, { "epoch": 0.36007827788649704, "grad_norm": 0.19205499028659237, "learning_rate": 1.4357477068537183e-05, "log_odds_chosen": 0.224609375, "log_odds_ratio": -0.594970703125, "logits/chosen": 0.287139892578125, "logits/rejected": 0.324462890625, "logps/chosen": -0.45404052734375, "logps/rejected": -0.5281982421875, "loss": 9.501, "nll_loss": 0.46746826171875, "rewards/accuracies": 1.0, "rewards/chosen": -0.04541778564453125, "rewards/margins": 0.00737762451171875, "rewards/rejected": -0.05279541015625, "step": 667 }, { "epoch": 0.36061812537958027, "grad_norm": 0.21143648023993444, "learning_rate": 1.43421280990636e-05, "log_odds_chosen": 0.2342529296875, "log_odds_ratio": -0.589111328125, "logits/chosen": 0.42458343505859375, "logits/rejected": 0.5526123046875, "logps/chosen": -0.4776611328125, "logps/rejected": -0.5599365234375, "loss": 9.5879, "nll_loss": 0.4837646484375, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04778289794921875, "rewards/margins": 0.00821685791015625, "rewards/rejected": -0.055999755859375, "step": 668 }, { "epoch": 0.3611579728726635, "grad_norm": 0.19477166920064312, "learning_rate": 1.4326766512720795e-05, "log_odds_chosen": 0.1756591796875, "log_odds_ratio": -0.61767578125, "logits/chosen": 0.0656890869140625, "logits/rejected": 0.18021774291992188, "logps/chosen": -0.54638671875, "logps/rejected": -0.592529296875, "loss": 8.3589, "nll_loss": 0.5482177734375, "rewards/accuracies": 0.875, "rewards/chosen": -0.0546722412109375, "rewards/margins": 0.00453948974609375, "rewards/rejected": -0.05921173095703125, "step": 669 }, { "epoch": 0.36169782036574666, "grad_norm": 0.20201094569287792, "learning_rate": 1.4311392354144744e-05, "log_odds_chosen": 0.14453125, "log_odds_ratio": -0.62939453125, "logits/chosen": 0.5510482788085938, "logits/rejected": 0.5945857167243958, "logps/chosen": -0.5115966796875, "logps/rejected": -0.5662841796875, "loss": 9.4946, "nll_loss": 0.533447265625, "rewards/accuracies": 0.75, "rewards/chosen": -0.05115509033203125, "rewards/margins": 0.00548553466796875, "rewards/rejected": -0.056640625, "step": 670 }, { "epoch": 0.3622376678588299, "grad_norm": 0.24202958625994508, "learning_rate": 1.429600566800796e-05, "log_odds_chosen": 0.1363525390625, "log_odds_ratio": -0.636962890625, "logits/chosen": 0.5479764938354492, "logits/rejected": 0.6326065063476562, "logps/chosen": -0.6168212890625, "logps/rejected": -0.653564453125, "loss": 10.521, "nll_loss": 0.6368408203125, "rewards/accuracies": 0.875, "rewards/chosen": -0.06166839599609375, "rewards/margins": 0.00370025634765625, "rewards/rejected": -0.06536865234375, "step": 671 }, { "epoch": 0.3627775153519131, "grad_norm": 0.2180306008254348, "learning_rate": 1.4280606499019347e-05, "log_odds_chosen": 0.0989990234375, "log_odds_ratio": -0.65869140625, "logits/chosen": 0.469970703125, "logits/rejected": 0.620819091796875, "logps/chosen": -0.5914306640625, "logps/rejected": -0.6243896484375, "loss": 10.0889, "nll_loss": 0.60107421875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05918121337890625, "rewards/margins": 0.00331878662109375, "rewards/rejected": -0.06249237060546875, "step": 672 }, { "epoch": 0.36331736284499627, "grad_norm": 0.2595500223694877, "learning_rate": 1.4265194891924083e-05, "log_odds_chosen": 0.14996337890625, "log_odds_ratio": -0.629150390625, "logits/chosen": 0.5704736709594727, "logits/rejected": 0.6251068115234375, "logps/chosen": -0.50372314453125, "logps/rejected": -0.5699462890625, "loss": 9.0161, "nll_loss": 0.508056640625, "rewards/accuracies": 0.875, "rewards/chosen": -0.05037689208984375, "rewards/margins": 0.00661468505859375, "rewards/rejected": -0.0569915771484375, "step": 673 }, { "epoch": 0.3638572103380795, "grad_norm": 0.2074299111279858, "learning_rate": 1.4249770891503492e-05, "log_odds_chosen": 0.188232421875, "log_odds_ratio": -0.61474609375, "logits/chosen": 0.1923828125, "logits/rejected": 0.269287109375, "logps/chosen": -0.49066162109375, "logps/rejected": -0.5594482421875, "loss": 9.7158, "nll_loss": 0.500244140625, "rewards/accuracies": 0.875, "rewards/chosen": -0.049041748046875, "rewards/margins": 0.006927490234375, "rewards/rejected": -0.05596923828125, "step": 674 }, { "epoch": 0.3643970578311627, "grad_norm": 0.25444569764168573, "learning_rate": 1.4234334542574906e-05, "log_odds_chosen": 0.2598876953125, "log_odds_ratio": -0.5740966796875, "logits/chosen": 0.219635009765625, "logits/rejected": 0.2274169921875, "logps/chosen": -0.5081787109375, "logps/rejected": -0.60546875, "loss": 9.5269, "nll_loss": 0.510009765625, "rewards/accuracies": 1.0, "rewards/chosen": -0.05083465576171875, "rewards/margins": 0.00968170166015625, "rewards/rejected": -0.060516357421875, "step": 675 }, { "epoch": 0.3649369053242459, "grad_norm": 0.1964946131400277, "learning_rate": 1.4218885889991532e-05, "log_odds_chosen": 0.1285400390625, "log_odds_ratio": -0.6431884765625, "logits/chosen": 0.22418212890625, "logits/rejected": 0.2609701156616211, "logps/chosen": -0.644287109375, "logps/rejected": -0.6627197265625, "loss": 9.8862, "nll_loss": 0.64337158203125, "rewards/accuracies": 0.75, "rewards/chosen": -0.0643310546875, "rewards/margins": 0.0019378662109375, "rewards/rejected": -0.0662689208984375, "step": 676 }, { "epoch": 0.3654767528173291, "grad_norm": 0.19832097256789075, "learning_rate": 1.4203424978642337e-05, "log_odds_chosen": 0.1688232421875, "log_odds_ratio": -0.614501953125, "logits/chosen": 0.494110107421875, "logits/rejected": 0.5953712463378906, "logps/chosen": -0.4931640625, "logps/rejected": -0.5533447265625, "loss": 9.1313, "nll_loss": 0.5107421875, "rewards/accuracies": 0.875, "rewards/chosen": -0.0493316650390625, "rewards/margins": 0.0059967041015625, "rewards/rejected": -0.055328369140625, "step": 677 }, { "epoch": 0.36601660031041233, "grad_norm": 0.21305470945466565, "learning_rate": 1.4187951853451908e-05, "log_odds_chosen": 0.13201904296875, "log_odds_ratio": -0.6353759765625, "logits/chosen": 0.16180419921875, "logits/rejected": 0.24163055419921875, "logps/chosen": -0.46624755859375, "logps/rejected": -0.5050048828125, "loss": 9.1401, "nll_loss": 0.47186279296875, "rewards/accuracies": 0.625, "rewards/chosen": -0.04663848876953125, "rewards/margins": 0.00388336181640625, "rewards/rejected": -0.0505218505859375, "step": 678 }, { "epoch": 0.3665564478034955, "grad_norm": 0.1854229557418622, "learning_rate": 1.4172466559380315e-05, "log_odds_chosen": 0.185791015625, "log_odds_ratio": -0.6099853515625, "logits/chosen": 0.2151012420654297, "logits/rejected": 0.2906494140625, "logps/chosen": -0.42474365234375, "logps/rejected": -0.4775390625, "loss": 8.907, "nll_loss": 0.4295654296875, "rewards/accuracies": 0.875, "rewards/chosen": -0.0424652099609375, "rewards/margins": 0.005279541015625, "rewards/rejected": -0.0477447509765625, "step": 679 }, { "epoch": 0.3670962952965787, "grad_norm": 0.18873338320895158, "learning_rate": 1.4156969141422993e-05, "log_odds_chosen": 0.1314697265625, "log_odds_ratio": -0.63134765625, "logits/chosen": 0.2926177978515625, "logits/rejected": 0.4015655517578125, "logps/chosen": -0.588623046875, "logps/rejected": -0.641357421875, "loss": 9.7466, "nll_loss": 0.59375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05887603759765625, "rewards/margins": 0.00527191162109375, "rewards/rejected": -0.06414794921875, "step": 680 }, { "epoch": 0.36763614278966195, "grad_norm": 0.1930947300381155, "learning_rate": 1.4141459644610595e-05, "log_odds_chosen": 0.0172119140625, "log_odds_ratio": -0.691162109375, "logits/chosen": 0.27692604064941406, "logits/rejected": 0.30718994140625, "logps/chosen": -0.4111328125, "logps/rejected": -0.41943359375, "loss": 8.2573, "nll_loss": 0.42041015625, "rewards/accuracies": 0.75, "rewards/chosen": -0.0411224365234375, "rewards/margins": 0.0008392333984375, "rewards/rejected": -0.041961669921875, "step": 681 }, { "epoch": 0.3681759902827451, "grad_norm": 0.21196495146293537, "learning_rate": 1.4125938114008885e-05, "log_odds_chosen": 0.05859375, "log_odds_ratio": -0.66552734375, "logits/chosen": 0.81756591796875, "logits/rejected": 0.912445068359375, "logps/chosen": -0.5511474609375, "logps/rejected": -0.5709228515625, "loss": 9.457, "nll_loss": 0.5662841796875, "rewards/accuracies": 0.75, "rewards/chosen": -0.05511474609375, "rewards/margins": 0.0020294189453125, "rewards/rejected": -0.0571441650390625, "step": 682 }, { "epoch": 0.36871583777582834, "grad_norm": 0.1719616428389862, "learning_rate": 1.4110404594718587e-05, "log_odds_chosen": 0.08660888671875, "log_odds_ratio": -0.6552734375, "logits/chosen": 0.2586822509765625, "logits/rejected": 0.2950439453125, "logps/chosen": -0.44403076171875, "logps/rejected": -0.46435546875, "loss": 8.231, "nll_loss": 0.4471435546875, "rewards/accuracies": 0.625, "rewards/chosen": -0.04439544677734375, "rewards/margins": 0.00201416015625, "rewards/rejected": -0.04640960693359375, "step": 683 }, { "epoch": 0.36925568526891156, "grad_norm": 0.18933423517266987, "learning_rate": 1.4094859131875258e-05, "log_odds_chosen": 0.0670166015625, "log_odds_ratio": -0.660888671875, "logits/chosen": 0.4525604248046875, "logits/rejected": 0.5475502014160156, "logps/chosen": -0.4561767578125, "logps/rejected": -0.476318359375, "loss": 8.647, "nll_loss": 0.463623046875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0456085205078125, "rewards/margins": 0.00201416015625, "rewards/rejected": -0.0476226806640625, "step": 684 }, { "epoch": 0.3697955327619947, "grad_norm": 0.18469461131578233, "learning_rate": 1.4079301770649163e-05, "log_odds_chosen": 0.087646484375, "log_odds_ratio": -0.65283203125, "logits/chosen": 0.2548065185546875, "logits/rejected": 0.3497772216796875, "logps/chosen": -0.5418701171875, "logps/rejected": -0.5621337890625, "loss": 8.5479, "nll_loss": 0.54443359375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.05414581298828125, "rewards/margins": 0.00206756591796875, "rewards/rejected": -0.05621337890625, "step": 685 }, { "epoch": 0.37033538025507795, "grad_norm": 0.19506824504194628, "learning_rate": 1.406373255624514e-05, "log_odds_chosen": 0.058837890625, "log_odds_ratio": -0.66650390625, "logits/chosen": 0.28070068359375, "logits/rejected": 0.29615020751953125, "logps/chosen": -0.486328125, "logps/rejected": -0.4976806640625, "loss": 9.3389, "nll_loss": 0.49462890625, "rewards/accuracies": 0.75, "rewards/chosen": -0.04863739013671875, "rewards/margins": 0.00110626220703125, "rewards/rejected": -0.04974365234375, "step": 686 }, { "epoch": 0.3708752277481611, "grad_norm": 0.17662424650564915, "learning_rate": 1.4048151533902466e-05, "log_odds_chosen": -0.0013427734375, "log_odds_ratio": -0.696533203125, "logits/chosen": 0.27386474609375, "logits/rejected": 0.380706787109375, "logps/chosen": -0.4921875, "logps/rejected": -0.4815673828125, "loss": 9.1494, "nll_loss": 0.4964599609375, "rewards/accuracies": 0.5, "rewards/chosen": -0.0492095947265625, "rewards/margins": -0.0010833740234375, "rewards/rejected": -0.048126220703125, "step": 687 }, { "epoch": 0.37141507524124434, "grad_norm": 0.2003885752404502, "learning_rate": 1.403255874889473e-05, "log_odds_chosen": 0.083740234375, "log_odds_ratio": -0.654052734375, "logits/chosen": 0.28759002685546875, "logits/rejected": 0.3925933837890625, "logps/chosen": -0.43414306640625, "logps/rejected": -0.4609375, "loss": 9.2817, "nll_loss": 0.43939208984375, "rewards/accuracies": 0.75, "rewards/chosen": -0.0434112548828125, "rewards/margins": 0.0027008056640625, "rewards/rejected": -0.046112060546875, "step": 688 }, { "epoch": 0.37195492273432756, "grad_norm": 0.2030687960499768, "learning_rate": 1.4016954246529697e-05, "log_odds_chosen": 0.05224609375, "log_odds_ratio": -0.66845703125, "logits/chosen": 0.502044677734375, "logits/rejected": 0.577728271484375, "logps/chosen": -0.47601318359375, "logps/rejected": -0.492431640625, "loss": 9.3823, "nll_loss": 0.4869384765625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04763031005859375, "rewards/margins": 0.00159454345703125, "rewards/rejected": -0.049224853515625, "step": 689 }, { "epoch": 0.37249477022741073, "grad_norm": 0.18617058508794204, "learning_rate": 1.4001338072149184e-05, "log_odds_chosen": 0.0469970703125, "log_odds_ratio": -0.67236328125, "logits/chosen": 0.378387451171875, "logits/rejected": 0.5849227905273438, "logps/chosen": -0.54827880859375, "logps/rejected": -0.5528564453125, "loss": 9.4712, "nll_loss": 0.5546875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.054840087890625, "rewards/margins": 0.0004730224609375, "rewards/rejected": -0.0553131103515625, "step": 690 }, { "epoch": 0.37303461772049396, "grad_norm": 0.19196692202089158, "learning_rate": 1.398571027112892e-05, "log_odds_chosen": 0.0479736328125, "log_odds_ratio": -0.67138671875, "logits/chosen": 0.259246826171875, "logits/rejected": 0.3975982666015625, "logps/chosen": -0.5147705078125, "logps/rejected": -0.526123046875, "loss": 9.1147, "nll_loss": 0.5224609375, "rewards/accuracies": 0.625, "rewards/chosen": -0.051483154296875, "rewards/margins": 0.001129150390625, "rewards/rejected": -0.0526123046875, "step": 691 }, { "epoch": 0.3735744652135772, "grad_norm": 0.19461462133689175, "learning_rate": 1.3970070888878418e-05, "log_odds_chosen": -0.0174560546875, "log_odds_ratio": -0.704833984375, "logits/chosen": 0.3411407470703125, "logits/rejected": 0.506744384765625, "logps/chosen": -0.6473388671875, "logps/rejected": -0.6181640625, "loss": 10.001, "nll_loss": 0.6558837890625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.064697265625, "rewards/margins": -0.00286865234375, "rewards/rejected": -0.06182861328125, "step": 692 }, { "epoch": 0.37411431270666035, "grad_norm": 0.20392022102014573, "learning_rate": 1.395441997084084e-05, "log_odds_chosen": 0.056640625, "log_odds_ratio": -0.667724609375, "logits/chosen": 0.2648773193359375, "logits/rejected": 0.4186248779296875, "logps/chosen": -0.51123046875, "logps/rejected": -0.5174560546875, "loss": 9.7915, "nll_loss": 0.5263671875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0511474609375, "rewards/margins": 0.0005645751953125, "rewards/rejected": -0.0517120361328125, "step": 693 }, { "epoch": 0.37465416019974357, "grad_norm": 0.2000332023710579, "learning_rate": 1.3938757562492873e-05, "log_odds_chosen": 0.0302734375, "log_odds_ratio": -0.6826171875, "logits/chosen": 0.1391143798828125, "logits/rejected": 0.30112648010253906, "logps/chosen": -0.5322265625, "logps/rejected": -0.523681640625, "loss": 9.8091, "nll_loss": 0.5439453125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05323028564453125, "rewards/margins": -0.00086212158203125, "rewards/rejected": -0.0523681640625, "step": 694 }, { "epoch": 0.3751940076928268, "grad_norm": 0.19314737821908473, "learning_rate": 1.3923083709344586e-05, "log_odds_chosen": 0.1119384765625, "log_odds_ratio": -0.6409912109375, "logits/chosen": 0.29315185546875, "logits/rejected": 0.448516845703125, "logps/chosen": -0.50970458984375, "logps/rejected": -0.5391845703125, "loss": 9.4673, "nll_loss": 0.51226806640625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05097198486328125, "rewards/margins": 0.00293731689453125, "rewards/rejected": -0.0539093017578125, "step": 695 }, { "epoch": 0.37573385518590996, "grad_norm": 0.1901989185948175, "learning_rate": 1.3907398456939306e-05, "log_odds_chosen": 0.0615234375, "log_odds_ratio": -0.66552734375, "logits/chosen": 0.19794845581054688, "logits/rejected": 0.252655029296875, "logps/chosen": -0.459228515625, "logps/rejected": -0.479248046875, "loss": 9.7681, "nll_loss": 0.4754638671875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04596710205078125, "rewards/margins": 0.00199127197265625, "rewards/rejected": -0.0479583740234375, "step": 696 }, { "epoch": 0.3762737026789932, "grad_norm": 0.19970808228097364, "learning_rate": 1.3891701850853483e-05, "log_odds_chosen": 0.0751953125, "log_odds_ratio": -0.65966796875, "logits/chosen": 0.21779251098632812, "logits/rejected": 0.2838287353515625, "logps/chosen": -0.5059814453125, "logps/rejected": -0.52783203125, "loss": 9.5864, "nll_loss": 0.51641845703125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.05062103271484375, "rewards/margins": 0.00215911865234375, "rewards/rejected": -0.0527801513671875, "step": 697 }, { "epoch": 0.3768135501720764, "grad_norm": 0.20187549495694404, "learning_rate": 1.387599393669655e-05, "log_odds_chosen": -0.015380859375, "log_odds_ratio": -0.70556640625, "logits/chosen": 0.349822998046875, "logits/rejected": 0.4886474609375, "logps/chosen": -0.6492919921875, "logps/rejected": -0.616943359375, "loss": 9.9111, "nll_loss": 0.6510009765625, "rewards/accuracies": 0.625, "rewards/chosen": -0.06494140625, "rewards/margins": -0.003265380859375, "rewards/rejected": -0.061676025390625, "step": 698 }, { "epoch": 0.3773533976651596, "grad_norm": 0.1933747486267162, "learning_rate": 1.3860274760110808e-05, "log_odds_chosen": 0.0811767578125, "log_odds_ratio": -0.6561279296875, "logits/chosen": 0.20092391967773438, "logits/rejected": 0.3197479248046875, "logps/chosen": -0.4610595703125, "logps/rejected": -0.486572265625, "loss": 9.0962, "nll_loss": 0.4739990234375, "rewards/accuracies": 0.75, "rewards/chosen": -0.04610443115234375, "rewards/margins": 0.00255584716796875, "rewards/rejected": -0.0486602783203125, "step": 699 }, { "epoch": 0.3778932451582428, "grad_norm": 0.20602109503826538, "learning_rate": 1.384454436677128e-05, "log_odds_chosen": -0.034423828125, "log_odds_ratio": -0.720458984375, "logits/chosen": 0.138214111328125, "logits/rejected": 0.371826171875, "logps/chosen": -0.69921875, "logps/rejected": -0.646728515625, "loss": 10.1831, "nll_loss": 0.6954345703125, "rewards/accuracies": 0.625, "rewards/chosen": -0.06987762451171875, "rewards/margins": -0.0052337646484375, "rewards/rejected": -0.06464385986328125, "step": 700 }, { "epoch": 0.378433092651326, "grad_norm": 0.18921541398252378, "learning_rate": 1.3828802802385579e-05, "log_odds_chosen": 0.111328125, "log_odds_ratio": -0.641845703125, "logits/chosen": 0.15375518798828125, "logits/rejected": 0.15935134887695312, "logps/chosen": -0.383056640625, "logps/rejected": -0.4111328125, "loss": 8.8979, "nll_loss": 0.389892578125, "rewards/accuracies": 0.875, "rewards/chosen": -0.03826904296875, "rewards/margins": 0.00286102294921875, "rewards/rejected": -0.04113006591796875, "step": 701 }, { "epoch": 0.3789729401444092, "grad_norm": 0.18773245131399835, "learning_rate": 1.3813050112693778e-05, "log_odds_chosen": 0.1099853515625, "log_odds_ratio": -0.640869140625, "logits/chosen": 0.41302490234375, "logits/rejected": 0.5247879028320312, "logps/chosen": -0.4661865234375, "logps/rejected": -0.5029296875, "loss": 8.9287, "nll_loss": 0.4686279296875, "rewards/accuracies": 0.75, "rewards/chosen": -0.046630859375, "rewards/margins": 0.0036468505859375, "rewards/rejected": -0.0502777099609375, "step": 702 }, { "epoch": 0.3795127876374924, "grad_norm": 0.18336733770769942, "learning_rate": 1.3797286343468275e-05, "log_odds_chosen": 0.0885009765625, "log_odds_ratio": -0.6513671875, "logits/chosen": 0.492462158203125, "logits/rejected": 0.5833282470703125, "logps/chosen": -0.4766845703125, "logps/rejected": -0.5029296875, "loss": 9.9629, "nll_loss": 0.48077392578125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0476837158203125, "rewards/margins": 0.00261688232421875, "rewards/rejected": -0.05030059814453125, "step": 703 }, { "epoch": 0.38005263513057563, "grad_norm": 0.1860735994755113, "learning_rate": 1.3781511540513667e-05, "log_odds_chosen": 0.07568359375, "log_odds_ratio": -0.658203125, "logits/chosen": 0.5714187622070312, "logits/rejected": 0.590728759765625, "logps/chosen": -0.533203125, "logps/rejected": -0.5687255859375, "loss": 8.6245, "nll_loss": 0.537841796875, "rewards/accuracies": 0.75, "rewards/chosen": -0.0533599853515625, "rewards/margins": 0.0035552978515625, "rewards/rejected": -0.056915283203125, "step": 704 }, { "epoch": 0.3805924826236588, "grad_norm": 0.19036702656428534, "learning_rate": 1.376572574966661e-05, "log_odds_chosen": 0.024169921875, "log_odds_ratio": -0.683349609375, "logits/chosen": 0.58355712890625, "logits/rejected": 0.644256591796875, "logps/chosen": -0.601318359375, "logps/rejected": -0.59716796875, "loss": 9.7651, "nll_loss": 0.6129150390625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0600738525390625, "rewards/margins": -0.0003509521484375, "rewards/rejected": -0.059722900390625, "step": 705 }, { "epoch": 0.381132330116742, "grad_norm": 0.19995507260413503, "learning_rate": 1.3749929016795682e-05, "log_odds_chosen": 0.057861328125, "log_odds_ratio": -0.668701171875, "logits/chosen": 0.45477294921875, "logits/rejected": 0.53515625, "logps/chosen": -0.5364990234375, "logps/rejected": -0.5533447265625, "loss": 9.354, "nll_loss": 0.55615234375, "rewards/accuracies": 0.625, "rewards/chosen": -0.05364227294921875, "rewards/margins": 0.001708984375, "rewards/rejected": -0.05535125732421875, "step": 706 }, { "epoch": 0.38167217760982525, "grad_norm": 0.19404639974605467, "learning_rate": 1.3734121387801262e-05, "log_odds_chosen": 0.1240234375, "log_odds_ratio": -0.634765625, "logits/chosen": 0.20655059814453125, "logits/rejected": 0.325225830078125, "logps/chosen": -0.419677734375, "logps/rejected": -0.457763671875, "loss": 8.541, "nll_loss": 0.42828369140625, "rewards/accuracies": 0.9375, "rewards/chosen": -0.041961669921875, "rewards/margins": 0.0038299560546875, "rewards/rejected": -0.0457916259765625, "step": 707 }, { "epoch": 0.3822120251029084, "grad_norm": 0.1906355926649279, "learning_rate": 1.3718302908615386e-05, "log_odds_chosen": 0.126708984375, "log_odds_ratio": -0.634521484375, "logits/chosen": 0.26386260986328125, "logits/rejected": 0.38128662109375, "logps/chosen": -0.4962158203125, "logps/rejected": -0.5330810546875, "loss": 9.7129, "nll_loss": 0.5115966796875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0496063232421875, "rewards/margins": 0.00372314453125, "rewards/rejected": -0.0533294677734375, "step": 708 }, { "epoch": 0.38275187259599164, "grad_norm": 0.2086130606041885, "learning_rate": 1.3702473625201618e-05, "log_odds_chosen": 0.0537109375, "log_odds_ratio": -0.669677734375, "logits/chosen": 0.39581298828125, "logits/rejected": 0.5048294067382812, "logps/chosen": -0.5745849609375, "logps/rejected": -0.5843505859375, "loss": 9.499, "nll_loss": 0.5760498046875, "rewards/accuracies": 0.75, "rewards/chosen": -0.0574493408203125, "rewards/margins": 0.0009918212890625, "rewards/rejected": -0.058441162109375, "step": 709 }, { "epoch": 0.38329172008907486, "grad_norm": 0.1957533545007545, "learning_rate": 1.3686633583554913e-05, "log_odds_chosen": 0.0244140625, "log_odds_ratio": -0.68212890625, "logits/chosen": 0.38770484924316406, "logits/rejected": 0.4442710876464844, "logps/chosen": -0.4940185546875, "logps/rejected": -0.5025634765625, "loss": 9.1226, "nll_loss": 0.5078125, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04936981201171875, "rewards/margins": 0.00087738037109375, "rewards/rejected": -0.0502471923828125, "step": 710 }, { "epoch": 0.38383156758215803, "grad_norm": 0.213700993612007, "learning_rate": 1.3670782829701495e-05, "log_odds_chosen": -0.0133056640625, "log_odds_ratio": -0.705322265625, "logits/chosen": 0.4143524169921875, "logits/rejected": 0.56512451171875, "logps/chosen": -0.599853515625, "logps/rejected": -0.5770263671875, "loss": 9.8711, "nll_loss": 0.6126708984375, "rewards/accuracies": 0.5, "rewards/chosen": -0.059967041015625, "rewards/margins": -0.0023193359375, "rewards/rejected": -0.057647705078125, "step": 711 }, { "epoch": 0.38437141507524125, "grad_norm": 0.20238142836302, "learning_rate": 1.3654921409698703e-05, "log_odds_chosen": 0.1112060546875, "log_odds_ratio": -0.6416015625, "logits/chosen": 0.3913726806640625, "logits/rejected": 0.419219970703125, "logps/chosen": -0.4718017578125, "logps/rejected": -0.505859375, "loss": 9.7744, "nll_loss": 0.474365234375, "rewards/accuracies": 0.75, "rewards/chosen": -0.04715728759765625, "rewards/margins": 0.00344085693359375, "rewards/rejected": -0.05059814453125, "step": 712 }, { "epoch": 0.3849112625683245, "grad_norm": 0.21087386690704107, "learning_rate": 1.3639049369634878e-05, "log_odds_chosen": 0.035888671875, "log_odds_ratio": -0.6822509765625, "logits/chosen": 0.1175994873046875, "logits/rejected": 0.1611480712890625, "logps/chosen": -0.5592041015625, "logps/rejected": -0.5684814453125, "loss": 10.1294, "nll_loss": 0.5693359375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05588531494140625, "rewards/margins": 0.00092315673828125, "rewards/rejected": -0.0568084716796875, "step": 713 }, { "epoch": 0.38545111006140764, "grad_norm": 0.21559119974749208, "learning_rate": 1.3623166755629211e-05, "log_odds_chosen": 0.0926513671875, "log_odds_ratio": -0.6527099609375, "logits/chosen": 0.4749011993408203, "logits/rejected": 0.4502410888671875, "logps/chosen": -0.6160888671875, "logps/rejected": -0.665283203125, "loss": 10.0879, "nll_loss": 0.6204833984375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.06156158447265625, "rewards/margins": 0.00502777099609375, "rewards/rejected": -0.06658935546875, "step": 714 }, { "epoch": 0.38599095755449087, "grad_norm": 0.1871513912153957, "learning_rate": 1.3607273613831625e-05, "log_odds_chosen": 0.03662109375, "log_odds_ratio": -0.679443359375, "logits/chosen": 0.39000701904296875, "logits/rejected": 0.5760841369628906, "logps/chosen": -0.568359375, "logps/rejected": -0.5625, "loss": 9.0354, "nll_loss": 0.577880859375, "rewards/accuracies": 0.625, "rewards/chosen": -0.0568389892578125, "rewards/margins": -0.0005950927734375, "rewards/rejected": -0.056243896484375, "step": 715 }, { "epoch": 0.38653080504757403, "grad_norm": 0.17203220064102823, "learning_rate": 1.3591369990422622e-05, "log_odds_chosen": 0.072998046875, "log_odds_ratio": -0.658935546875, "logits/chosen": 0.151031494140625, "logits/rejected": 0.240966796875, "logps/chosen": -0.4110107421875, "logps/rejected": -0.4327392578125, "loss": 8.6372, "nll_loss": 0.41259765625, "rewards/accuracies": 0.75, "rewards/chosen": -0.04109954833984375, "rewards/margins": 0.00217437744140625, "rewards/rejected": -0.04327392578125, "step": 716 }, { "epoch": 0.38707065254065726, "grad_norm": 0.18843850024869255, "learning_rate": 1.3575455931613184e-05, "log_odds_chosen": 0.080810546875, "log_odds_ratio": -0.65576171875, "logits/chosen": 0.352447509765625, "logits/rejected": 0.4273567199707031, "logps/chosen": -0.4239501953125, "logps/rejected": -0.4498291015625, "loss": 9.2334, "nll_loss": 0.43603515625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04239654541015625, "rewards/margins": 0.00258636474609375, "rewards/rejected": -0.04498291015625, "step": 717 }, { "epoch": 0.3876105000337405, "grad_norm": 0.20970358491132893, "learning_rate": 1.3559531483644587e-05, "log_odds_chosen": 0.0570068359375, "log_odds_ratio": -0.669677734375, "logits/chosen": 0.20896148681640625, "logits/rejected": 0.462646484375, "logps/chosen": -0.5953369140625, "logps/rejected": -0.59912109375, "loss": 10.0505, "nll_loss": 0.6090087890625, "rewards/accuracies": 0.75, "rewards/chosen": -0.05947113037109375, "rewards/margins": 0.00041961669921875, "rewards/rejected": -0.0598907470703125, "step": 718 }, { "epoch": 0.38815034752682365, "grad_norm": 0.17724073764453327, "learning_rate": 1.3543596692788315e-05, "log_odds_chosen": 0.089111328125, "log_odds_ratio": -0.65087890625, "logits/chosen": 0.15088844299316406, "logits/rejected": 0.23577880859375, "logps/chosen": -0.4378662109375, "logps/rejected": -0.4632568359375, "loss": 9.0044, "nll_loss": 0.44091796875, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04376983642578125, "rewards/margins": 0.00257110595703125, "rewards/rejected": -0.0463409423828125, "step": 719 }, { "epoch": 0.38869019501990687, "grad_norm": 0.2124565851349508, "learning_rate": 1.352765160534589e-05, "log_odds_chosen": 0.13720703125, "log_odds_ratio": -0.628662109375, "logits/chosen": 0.4382476806640625, "logits/rejected": 0.5889968872070312, "logps/chosen": -0.4404296875, "logps/rejected": -0.48291015625, "loss": 9.8643, "nll_loss": 0.462158203125, "rewards/accuracies": 0.875, "rewards/chosen": -0.04402923583984375, "rewards/margins": 0.00423431396484375, "rewards/rejected": -0.0482635498046875, "step": 720 }, { "epoch": 0.3892300425129901, "grad_norm": 0.20405001817774338, "learning_rate": 1.3511696267648766e-05, "log_odds_chosen": 0.12548828125, "log_odds_ratio": -0.6365966796875, "logits/chosen": 0.1814727783203125, "logits/rejected": 0.266387939453125, "logps/chosen": -0.40618896484375, "logps/rejected": -0.4466552734375, "loss": 8.998, "nll_loss": 0.41156005859375, "rewards/accuracies": 0.8125, "rewards/chosen": -0.040618896484375, "rewards/margins": 0.00406646728515625, "rewards/rejected": -0.04468536376953125, "step": 721 }, { "epoch": 0.38976989000607326, "grad_norm": 0.18868904954144358, "learning_rate": 1.3495730726058174e-05, "log_odds_chosen": 0.06011962890625, "log_odds_ratio": -0.666748046875, "logits/chosen": 0.365936279296875, "logits/rejected": 0.4669189453125, "logps/chosen": -0.45343017578125, "logps/rejected": -0.4732666015625, "loss": 8.9756, "nll_loss": 0.46649169921875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04535675048828125, "rewards/margins": 0.0019989013671875, "rewards/rejected": -0.04735565185546875, "step": 722 }, { "epoch": 0.3903097374991565, "grad_norm": 0.17442328956967726, "learning_rate": 1.3479755026964995e-05, "log_odds_chosen": 0.0606689453125, "log_odds_ratio": -0.66650390625, "logits/chosen": 0.1741180419921875, "logits/rejected": 0.266754150390625, "logps/chosen": -0.5413818359375, "logps/rejected": -0.54833984375, "loss": 8.9331, "nll_loss": 0.5435791015625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0541229248046875, "rewards/margins": 0.0006866455078125, "rewards/rejected": -0.0548095703125, "step": 723 }, { "epoch": 0.3908495849922397, "grad_norm": 0.21254620060109436, "learning_rate": 1.3463769216789626e-05, "log_odds_chosen": 0.06103515625, "log_odds_ratio": -0.665771484375, "logits/chosen": 0.151824951171875, "logits/rejected": 0.2664337158203125, "logps/chosen": -0.45013427734375, "logps/rejected": -0.470947265625, "loss": 9.3647, "nll_loss": 0.45245361328125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04502105712890625, "rewards/margins": 0.0020751953125, "rewards/rejected": -0.04709625244140625, "step": 724 }, { "epoch": 0.3913894324853229, "grad_norm": 0.19005818297710755, "learning_rate": 1.344777334198184e-05, "log_odds_chosen": 0.0919189453125, "log_odds_ratio": -0.6524658203125, "logits/chosen": 0.2059783935546875, "logits/rejected": 0.34564208984375, "logps/chosen": -0.5010986328125, "logps/rejected": -0.5234375, "loss": 9.2075, "nll_loss": 0.50933837890625, "rewards/accuracies": 0.625, "rewards/chosen": -0.05010223388671875, "rewards/margins": 0.0022430419921875, "rewards/rejected": -0.05234527587890625, "step": 725 }, { "epoch": 0.3919292799784061, "grad_norm": 0.17558505261700386, "learning_rate": 1.343176744902066e-05, "log_odds_chosen": 0.0645751953125, "log_odds_ratio": -0.66650390625, "logits/chosen": 0.505218505859375, "logits/rejected": 0.6589126586914062, "logps/chosen": -0.5587158203125, "logps/rejected": -0.56689453125, "loss": 9.0195, "nll_loss": 0.571533203125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0558624267578125, "rewards/margins": 0.00079345703125, "rewards/rejected": -0.0566558837890625, "step": 726 }, { "epoch": 0.3924691274714893, "grad_norm": 0.18535654259468107, "learning_rate": 1.3415751584414216e-05, "log_odds_chosen": 0.01611328125, "log_odds_ratio": -0.686279296875, "logits/chosen": 0.3807373046875, "logits/rejected": 0.458892822265625, "logps/chosen": -0.563232421875, "logps/rejected": -0.5689697265625, "loss": 9.3989, "nll_loss": 0.5662841796875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.056304931640625, "rewards/margins": 0.00054931640625, "rewards/rejected": -0.056854248046875, "step": 727 }, { "epoch": 0.3930089749645725, "grad_norm": 0.18644250159097892, "learning_rate": 1.3399725794699608e-05, "log_odds_chosen": 0.10107421875, "log_odds_ratio": -0.6453857421875, "logits/chosen": 0.2713890075683594, "logits/rejected": 0.35790252685546875, "logps/chosen": -0.4288330078125, "logps/rejected": -0.4615478515625, "loss": 9.2554, "nll_loss": 0.4332275390625, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0428924560546875, "rewards/margins": 0.003265380859375, "rewards/rejected": -0.0461578369140625, "step": 728 }, { "epoch": 0.3935488224576557, "grad_norm": 0.20022917972073845, "learning_rate": 1.3383690126442788e-05, "log_odds_chosen": 0.0577392578125, "log_odds_ratio": -0.668701171875, "logits/chosen": 0.419891357421875, "logits/rejected": 0.5634820461273193, "logps/chosen": -0.529052734375, "logps/rejected": -0.5399169921875, "loss": 9.7314, "nll_loss": 0.5364990234375, "rewards/accuracies": 0.625, "rewards/chosen": -0.05292510986328125, "rewards/margins": 0.00107574462890625, "rewards/rejected": -0.0540008544921875, "step": 729 }, { "epoch": 0.39408866995073893, "grad_norm": 0.19811459193778974, "learning_rate": 1.3367644626238398e-05, "log_odds_chosen": 0.03631591796875, "log_odds_ratio": -0.679443359375, "logits/chosen": 0.23138427734375, "logits/rejected": 0.38916015625, "logps/chosen": -0.61334228515625, "logps/rejected": -0.599853515625, "loss": 9.688, "nll_loss": 0.611083984375, "rewards/accuracies": 0.5, "rewards/chosen": -0.06137847900390625, "rewards/margins": -0.00141143798828125, "rewards/rejected": -0.059967041015625, "step": 730 }, { "epoch": 0.3946285174438221, "grad_norm": 0.19325907001097284, "learning_rate": 1.3351589340709654e-05, "log_odds_chosen": 0.0040283203125, "log_odds_ratio": -0.6953125, "logits/chosen": 0.421966552734375, "logits/rejected": 0.5297737121582031, "logps/chosen": -0.5660400390625, "logps/rejected": -0.54443359375, "loss": 9.9199, "nll_loss": 0.572509765625, "rewards/accuracies": 0.6875, "rewards/chosen": -0.056610107421875, "rewards/margins": -0.0021514892578125, "rewards/rejected": -0.0544586181640625, "step": 731 }, { "epoch": 0.3951683649369053, "grad_norm": 0.20186199206309993, "learning_rate": 1.3335524316508208e-05, "log_odds_chosen": 0.0823974609375, "log_odds_ratio": -0.6552734375, "logits/chosen": 0.03190040588378906, "logits/rejected": 0.1494903564453125, "logps/chosen": -0.49029541015625, "logps/rejected": -0.50701904296875, "loss": 9.1196, "nll_loss": 0.48828125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.0490264892578125, "rewards/margins": 0.00171661376953125, "rewards/rejected": -0.05074310302734375, "step": 732 }, { "epoch": 0.39570821242998855, "grad_norm": 0.21873147599667903, "learning_rate": 1.3319449600314006e-05, "log_odds_chosen": 0.05157470703125, "log_odds_ratio": -0.669189453125, "logits/chosen": 0.5632781982421875, "logits/rejected": 0.6410675048828125, "logps/chosen": -0.47772216796875, "logps/rejected": -0.4921875, "loss": 9.7495, "nll_loss": 0.5052490234375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.04779815673828125, "rewards/margins": 0.00142669677734375, "rewards/rejected": -0.049224853515625, "step": 733 }, { "epoch": 0.3962480599230717, "grad_norm": 0.20728567776834647, "learning_rate": 1.3303365238835156e-05, "log_odds_chosen": 0.0692138671875, "log_odds_ratio": -0.660400390625, "logits/chosen": 0.390350341796875, "logits/rejected": 0.586669921875, "logps/chosen": -0.481201171875, "logps/rejected": -0.5047607421875, "loss": 9.8115, "nll_loss": 0.4874267578125, "rewards/accuracies": 0.8125, "rewards/chosen": -0.0481109619140625, "rewards/margins": 0.00238800048828125, "rewards/rejected": -0.05049896240234375, "step": 734 }, { "epoch": 0.39678790741615494, "grad_norm": 0.18969514923534242, "learning_rate": 1.3287271278807794e-05, "log_odds_chosen": 0.0784912109375, "log_odds_ratio": -0.6572265625, "logits/chosen": 0.49322509765625, "logits/rejected": 0.5844879150390625, "logps/chosen": -0.44549560546875, "logps/rejected": -0.467529296875, "loss": 8.7793, "nll_loss": 0.45635986328125, "rewards/accuracies": 0.75, "rewards/chosen": -0.04453277587890625, "rewards/margins": 0.00225067138671875, "rewards/rejected": -0.046783447265625, "step": 735 }, { "epoch": 0.39732775490923816, "grad_norm": 0.1889612028780109, "learning_rate": 1.3271167766995948e-05, "log_odds_chosen": 0.0791015625, "log_odds_ratio": -0.657470703125, "logits/chosen": 0.2344207763671875, "logits/rejected": 0.26324462890625, "logps/chosen": -0.39642333984375, "logps/rejected": -0.416259765625, "loss": 9.0718, "nll_loss": 0.40155029296875, "rewards/accuracies": 0.6875, "rewards/chosen": -0.03961181640625, "rewards/margins": 0.0019989013671875, "rewards/rejected": -0.0416107177734375, "step": 736 }, { "epoch": 0.39786760240232133, "grad_norm": 0.18660483355750285, "learning_rate": 1.3255054750191395e-05, "log_odds_chosen": 0.08349609375, "log_odds_ratio": -0.655029296875, "logits/chosen": 0.287841796875, "logits/rejected": 0.415191650390625, "logps/chosen": -0.4617919921875, "logps/rejected": -0.48193359375, "loss": 10.0918, "nll_loss": 0.46807861328125, "rewards/accuracies": 0.6875, "rewards/chosen": -0.04615020751953125, "rewards/margins": 0.0020294189453125, "rewards/rejected": -0.04817962646484375, "step": 737 }, { "epoch": 0.39840744989540455, "grad_norm": 0.1766236969536795, "learning_rate": 1.323893227521353e-05, "log_odds_chosen": 0.09326171875, "log_odds_ratio": -0.650390625, "logits/chosen": -0.05043220520019531, "logits/rejected": 0.07661056518554688, "logps/chosen": -0.47857666015625, "logps/rejected": -0.4959716796875, "loss": 8.8506, "nll_loss": 0.4864501953125, "rewards/accuracies": 0.875, "rewards/chosen": -0.0478515625, "rewards/margins": 0.001739501953125, "rewards/rejected": -0.049591064453125, "step": 738 }, { "epoch": 0.3989472973884878, "grad_norm": 0.18418197024768768, "learning_rate": 1.3222800388909238e-05, "log_odds_chosen": -0.0096435546875, "log_odds_ratio": -0.702880859375, "logits/chosen": 0.267791748046875, "logits/rejected": 0.3880615234375, "logps/chosen": -0.514892578125, "logps/rejected": -0.50396728515625, "loss": 8.9927, "nll_loss": 0.5224609375, "rewards/accuracies": 0.5625, "rewards/chosen": -0.0514984130859375, "rewards/margins": -0.0010833740234375, "rewards/rejected": -0.0504150390625, "step": 739 }, { "epoch": 0.39948714488157094, "grad_norm": 0.1987679891780259, "learning_rate": 1.3206659138152749e-05, "log_odds_chosen": 0.0423583984375, "log_odds_ratio": -0.6748046875, "logits/chosen": 0.255035400390625, "logits/rejected": 0.29217529296875, "logps/chosen": -0.4224853515625, "logps/rejected": -0.437255859375, "loss": 9.5791, "nll_loss": 0.428955078125, "rewards/accuracies": 0.75, "rewards/chosen": -0.04224395751953125, "rewards/margins": 0.00146484375, "rewards/rejected": -0.04370880126953125, "step": 740 }, { "epoch": 0.40002699237465417, "grad_norm": 0.20176725203900253, "learning_rate": 1.3190508569845495e-05, "log_odds_chosen": 0.0196533203125, "log_odds_ratio": -0.6865234375, "logits/chosen": 0.1331329345703125, "logits/rejected": 0.268035888671875, "logps/chosen": -0.44891357421875, "logps/rejected": -0.44610595703125, "loss": 9.4683, "nll_loss": 0.45233154296875, "rewards/accuracies": 0.5625, "rewards/chosen": -0.044918060302734375, "rewards/margins": -0.000316619873046875, "rewards/rejected": -0.0446014404296875, "step": 741 }, { "epoch": 0.4005668398677374, "grad_norm": 0.18186628655280576, "learning_rate": 1.3174348730915994e-05, "log_odds_chosen": 0.0252685546875, "log_odds_ratio": -0.68212890625, "logits/chosen": 0.4471435546875, "logits/rejected": 0.5228958129882812, "logps/chosen": -0.4814453125, "logps/rejected": -0.4857177734375, "loss": 9.2319, "nll_loss": 0.4849853515625, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0482025146484375, "rewards/margins": 0.0003509521484375, "rewards/rejected": -0.048553466796875, "step": 742 } ], "logging_steps": 1, "max_steps": 1853, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 371, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }